btrfs-progs: Fix return value bug of qgroups check
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int repair = 0;
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 struct extent_backref {
79         struct list_head list;
80         unsigned int is_data:1;
81         unsigned int found_extent_tree:1;
82         unsigned int full_backref:1;
83         unsigned int found_ref:1;
84         unsigned int broken:1;
85 };
86
87 struct data_backref {
88         struct extent_backref node;
89         union {
90                 u64 parent;
91                 u64 root;
92         };
93         u64 owner;
94         u64 offset;
95         u64 disk_bytenr;
96         u64 bytes;
97         u64 ram_bytes;
98         u32 num_refs;
99         u32 found_ref;
100 };
101
102 /*
103  * Much like data_backref, just removed the undetermined members
104  * and change it to use list_head.
105  * During extent scan, it is stored in root->orphan_data_extent.
106  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
107  */
108 struct orphan_data_extent {
109         struct list_head list;
110         u64 root;
111         u64 objectid;
112         u64 offset;
113         u64 disk_bytenr;
114         u64 disk_len;
115 };
116
117 struct tree_backref {
118         struct extent_backref node;
119         union {
120                 u64 parent;
121                 u64 root;
122         };
123 };
124
125 struct extent_record {
126         struct list_head backrefs;
127         struct list_head dups;
128         struct list_head list;
129         struct cache_extent cache;
130         struct btrfs_disk_key parent_key;
131         u64 start;
132         u64 max_size;
133         u64 nr;
134         u64 refs;
135         u64 extent_item_refs;
136         u64 generation;
137         u64 parent_generation;
138         u64 info_objectid;
139         u32 num_duplicates;
140         u8 info_level;
141         int flag_block_full_backref;
142         unsigned int found_rec:1;
143         unsigned int content_checked:1;
144         unsigned int owner_ref_checked:1;
145         unsigned int is_root:1;
146         unsigned int metadata:1;
147         unsigned int bad_full_backref:1;
148         unsigned int crossing_stripes:1;
149         unsigned int wrong_chunk_type:1;
150 };
151
152 struct inode_backref {
153         struct list_head list;
154         unsigned int found_dir_item:1;
155         unsigned int found_dir_index:1;
156         unsigned int found_inode_ref:1;
157         unsigned int filetype:8;
158         int errors;
159         unsigned int ref_type;
160         u64 dir;
161         u64 index;
162         u16 namelen;
163         char name[0];
164 };
165
166 struct root_item_record {
167         struct list_head list;
168         u64 objectid;
169         u64 bytenr;
170         u64 last_snapshot;
171         u8 level;
172         u8 drop_level;
173         int level_size;
174         struct btrfs_key drop_key;
175 };
176
177 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
178 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
179 #define REF_ERR_NO_INODE_REF            (1 << 2)
180 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
181 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
182 #define REF_ERR_DUP_INODE_REF           (1 << 5)
183 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
184 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
185 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
186 #define REF_ERR_NO_ROOT_REF             (1 << 9)
187 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
188 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
189 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
190
191 struct file_extent_hole {
192         struct rb_node node;
193         u64 start;
194         u64 len;
195 };
196
197 struct inode_record {
198         struct list_head backrefs;
199         unsigned int checked:1;
200         unsigned int merging:1;
201         unsigned int found_inode_item:1;
202         unsigned int found_dir_item:1;
203         unsigned int found_file_extent:1;
204         unsigned int found_csum_item:1;
205         unsigned int some_csum_missing:1;
206         unsigned int nodatasum:1;
207         int errors;
208
209         u64 ino;
210         u32 nlink;
211         u32 imode;
212         u64 isize;
213         u64 nbytes;
214
215         u32 found_link;
216         u64 found_size;
217         u64 extent_start;
218         u64 extent_end;
219         struct rb_root holes;
220         struct list_head orphan_extents;
221
222         u32 refs;
223 };
224
225 #define I_ERR_NO_INODE_ITEM             (1 << 0)
226 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
227 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
228 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
229 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
230 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
231 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
232 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
233 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
234 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
235 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
236 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
237 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
238 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
239 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
240
241 struct root_backref {
242         struct list_head list;
243         unsigned int found_dir_item:1;
244         unsigned int found_dir_index:1;
245         unsigned int found_back_ref:1;
246         unsigned int found_forward_ref:1;
247         unsigned int reachable:1;
248         int errors;
249         u64 ref_root;
250         u64 dir;
251         u64 index;
252         u16 namelen;
253         char name[0];
254 };
255
256 struct root_record {
257         struct list_head backrefs;
258         struct cache_extent cache;
259         unsigned int found_root_item:1;
260         u64 objectid;
261         u32 found_ref;
262 };
263
264 struct ptr_node {
265         struct cache_extent cache;
266         void *data;
267 };
268
269 struct shared_node {
270         struct cache_extent cache;
271         struct cache_tree root_cache;
272         struct cache_tree inode_cache;
273         struct inode_record *current;
274         u32 refs;
275 };
276
277 struct block_info {
278         u64 start;
279         u32 size;
280 };
281
282 struct walk_control {
283         struct cache_tree shared;
284         struct shared_node *nodes[BTRFS_MAX_LEVEL];
285         int active_node;
286         int root_level;
287 };
288
289 struct bad_item {
290         struct btrfs_key key;
291         u64 root_id;
292         struct list_head list;
293 };
294
295 struct extent_entry {
296         u64 bytenr;
297         u64 bytes;
298         int count;
299         int broken;
300         struct list_head list;
301 };
302
303 struct root_item_info {
304         /* level of the root */
305         u8 level;
306         /* number of nodes at this level, must be 1 for a root */
307         int node_count;
308         u64 bytenr;
309         u64 gen;
310         struct cache_extent cache_extent;
311 };
312
313 static void *print_status_check(void *p)
314 {
315         struct task_ctx *priv = p;
316         const char work_indicator[] = { '.', 'o', 'O', 'o' };
317         uint32_t count = 0;
318         static char *task_position_string[] = {
319                 "checking extents",
320                 "checking free space cache",
321                 "checking fs roots",
322         };
323
324         task_period_start(priv->info, 1000 /* 1s */);
325
326         if (priv->tp == TASK_NOTHING)
327                 return NULL;
328
329         while (1) {
330                 printf("%s [%c]\r", task_position_string[priv->tp],
331                                 work_indicator[count % 4]);
332                 count++;
333                 fflush(stdout);
334                 task_period_wait(priv->info);
335         }
336         return NULL;
337 }
338
339 static int print_status_return(void *p)
340 {
341         printf("\n");
342         fflush(stdout);
343
344         return 0;
345 }
346
347 /* Compatible function to allow reuse of old codes */
348 static u64 first_extent_gap(struct rb_root *holes)
349 {
350         struct file_extent_hole *hole;
351
352         if (RB_EMPTY_ROOT(holes))
353                 return (u64)-1;
354
355         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
356         return hole->start;
357 }
358
359 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
360 {
361         struct file_extent_hole *hole1;
362         struct file_extent_hole *hole2;
363
364         hole1 = rb_entry(node1, struct file_extent_hole, node);
365         hole2 = rb_entry(node2, struct file_extent_hole, node);
366
367         if (hole1->start > hole2->start)
368                 return -1;
369         if (hole1->start < hole2->start)
370                 return 1;
371         /* Now hole1->start == hole2->start */
372         if (hole1->len >= hole2->len)
373                 /*
374                  * Hole 1 will be merge center
375                  * Same hole will be merged later
376                  */
377                 return -1;
378         /* Hole 2 will be merge center */
379         return 1;
380 }
381
382 /*
383  * Add a hole to the record
384  *
385  * This will do hole merge for copy_file_extent_holes(),
386  * which will ensure there won't be continuous holes.
387  */
388 static int add_file_extent_hole(struct rb_root *holes,
389                                 u64 start, u64 len)
390 {
391         struct file_extent_hole *hole;
392         struct file_extent_hole *prev = NULL;
393         struct file_extent_hole *next = NULL;
394
395         hole = malloc(sizeof(*hole));
396         if (!hole)
397                 return -ENOMEM;
398         hole->start = start;
399         hole->len = len;
400         /* Since compare will not return 0, no -EEXIST will happen */
401         rb_insert(holes, &hole->node, compare_hole);
402
403         /* simple merge with previous hole */
404         if (rb_prev(&hole->node))
405                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
406                                 node);
407         if (prev && prev->start + prev->len >= hole->start) {
408                 hole->len = hole->start + hole->len - prev->start;
409                 hole->start = prev->start;
410                 rb_erase(&prev->node, holes);
411                 free(prev);
412                 prev = NULL;
413         }
414
415         /* iterate merge with next holes */
416         while (1) {
417                 if (!rb_next(&hole->node))
418                         break;
419                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
420                                         node);
421                 if (hole->start + hole->len >= next->start) {
422                         if (hole->start + hole->len <= next->start + next->len)
423                                 hole->len = next->start + next->len -
424                                             hole->start;
425                         rb_erase(&next->node, holes);
426                         free(next);
427                         next = NULL;
428                 } else
429                         break;
430         }
431         return 0;
432 }
433
434 static int compare_hole_range(struct rb_node *node, void *data)
435 {
436         struct file_extent_hole *hole;
437         u64 start;
438
439         hole = (struct file_extent_hole *)data;
440         start = hole->start;
441
442         hole = rb_entry(node, struct file_extent_hole, node);
443         if (start < hole->start)
444                 return -1;
445         if (start >= hole->start && start < hole->start + hole->len)
446                 return 0;
447         return 1;
448 }
449
450 /*
451  * Delete a hole in the record
452  *
453  * This will do the hole split and is much restrict than add.
454  */
455 static int del_file_extent_hole(struct rb_root *holes,
456                                 u64 start, u64 len)
457 {
458         struct file_extent_hole *hole;
459         struct file_extent_hole tmp;
460         u64 prev_start = 0;
461         u64 prev_len = 0;
462         u64 next_start = 0;
463         u64 next_len = 0;
464         struct rb_node *node;
465         int have_prev = 0;
466         int have_next = 0;
467         int ret = 0;
468
469         tmp.start = start;
470         tmp.len = len;
471         node = rb_search(holes, &tmp, compare_hole_range, NULL);
472         if (!node)
473                 return -EEXIST;
474         hole = rb_entry(node, struct file_extent_hole, node);
475         if (start + len > hole->start + hole->len)
476                 return -EEXIST;
477
478         /*
479          * Now there will be no overflap, delete the hole and re-add the
480          * split(s) if they exists.
481          */
482         if (start > hole->start) {
483                 prev_start = hole->start;
484                 prev_len = start - hole->start;
485                 have_prev = 1;
486         }
487         if (hole->start + hole->len > start + len) {
488                 next_start = start + len;
489                 next_len = hole->start + hole->len - start - len;
490                 have_next = 1;
491         }
492         rb_erase(node, holes);
493         free(hole);
494         if (have_prev) {
495                 ret = add_file_extent_hole(holes, prev_start, prev_len);
496                 if (ret < 0)
497                         return ret;
498         }
499         if (have_next) {
500                 ret = add_file_extent_hole(holes, next_start, next_len);
501                 if (ret < 0)
502                         return ret;
503         }
504         return 0;
505 }
506
507 static int copy_file_extent_holes(struct rb_root *dst,
508                                   struct rb_root *src)
509 {
510         struct file_extent_hole *hole;
511         struct rb_node *node;
512         int ret = 0;
513
514         node = rb_first(src);
515         while (node) {
516                 hole = rb_entry(node, struct file_extent_hole, node);
517                 ret = add_file_extent_hole(dst, hole->start, hole->len);
518                 if (ret)
519                         break;
520                 node = rb_next(node);
521         }
522         return ret;
523 }
524
525 static void free_file_extent_holes(struct rb_root *holes)
526 {
527         struct rb_node *node;
528         struct file_extent_hole *hole;
529
530         node = rb_first(holes);
531         while (node) {
532                 hole = rb_entry(node, struct file_extent_hole, node);
533                 rb_erase(node, holes);
534                 free(hole);
535                 node = rb_first(holes);
536         }
537 }
538
539 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
540
541 static void record_root_in_trans(struct btrfs_trans_handle *trans,
542                                  struct btrfs_root *root)
543 {
544         if (root->last_trans != trans->transid) {
545                 root->track_dirty = 1;
546                 root->last_trans = trans->transid;
547                 root->commit_root = root->node;
548                 extent_buffer_get(root->node);
549         }
550 }
551
552 static u8 imode_to_type(u32 imode)
553 {
554 #define S_SHIFT 12
555         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
556                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
557                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
558                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
559                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
560                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
561                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
562                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
563         };
564
565         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
566 #undef S_SHIFT
567 }
568
569 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
570 {
571         struct device_record *rec1;
572         struct device_record *rec2;
573
574         rec1 = rb_entry(node1, struct device_record, node);
575         rec2 = rb_entry(node2, struct device_record, node);
576         if (rec1->devid > rec2->devid)
577                 return -1;
578         else if (rec1->devid < rec2->devid)
579                 return 1;
580         else
581                 return 0;
582 }
583
584 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
585 {
586         struct inode_record *rec;
587         struct inode_backref *backref;
588         struct inode_backref *orig;
589         struct inode_backref *tmp;
590         struct orphan_data_extent *src_orphan;
591         struct orphan_data_extent *dst_orphan;
592         size_t size;
593         int ret;
594
595         rec = malloc(sizeof(*rec));
596         if (!rec)
597                 return ERR_PTR(-ENOMEM);
598         memcpy(rec, orig_rec, sizeof(*rec));
599         rec->refs = 1;
600         INIT_LIST_HEAD(&rec->backrefs);
601         INIT_LIST_HEAD(&rec->orphan_extents);
602         rec->holes = RB_ROOT;
603
604         list_for_each_entry(orig, &orig_rec->backrefs, list) {
605                 size = sizeof(*orig) + orig->namelen + 1;
606                 backref = malloc(size);
607                 if (!backref) {
608                         ret = -ENOMEM;
609                         goto cleanup;
610                 }
611                 memcpy(backref, orig, size);
612                 list_add_tail(&backref->list, &rec->backrefs);
613         }
614         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
615                 dst_orphan = malloc(sizeof(*dst_orphan));
616                 if (!dst_orphan) {
617                         ret = -ENOMEM;
618                         goto cleanup;
619                 }
620                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
621                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
622         }
623         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
624         BUG_ON(ret < 0);
625
626         return rec;
627
628 cleanup:
629         if (!list_empty(&rec->backrefs))
630                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
631                         list_del(&orig->list);
632                         free(orig);
633                 }
634
635         if (!list_empty(&rec->orphan_extents))
636                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
637                         list_del(&orig->list);
638                         free(orig);
639                 }
640
641         free(rec);
642
643         return ERR_PTR(ret);
644 }
645
646 static void print_orphan_data_extents(struct list_head *orphan_extents,
647                                       u64 objectid)
648 {
649         struct orphan_data_extent *orphan;
650
651         if (list_empty(orphan_extents))
652                 return;
653         printf("The following data extent is lost in tree %llu:\n",
654                objectid);
655         list_for_each_entry(orphan, orphan_extents, list) {
656                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
657                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
658                        orphan->disk_len);
659         }
660 }
661
662 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
663 {
664         u64 root_objectid = root->root_key.objectid;
665         int errors = rec->errors;
666
667         if (!errors)
668                 return;
669         /* reloc root errors, we print its corresponding fs root objectid*/
670         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
671                 root_objectid = root->root_key.offset;
672                 fprintf(stderr, "reloc");
673         }
674         fprintf(stderr, "root %llu inode %llu errors %x",
675                 (unsigned long long) root_objectid,
676                 (unsigned long long) rec->ino, rec->errors);
677
678         if (errors & I_ERR_NO_INODE_ITEM)
679                 fprintf(stderr, ", no inode item");
680         if (errors & I_ERR_NO_ORPHAN_ITEM)
681                 fprintf(stderr, ", no orphan item");
682         if (errors & I_ERR_DUP_INODE_ITEM)
683                 fprintf(stderr, ", dup inode item");
684         if (errors & I_ERR_DUP_DIR_INDEX)
685                 fprintf(stderr, ", dup dir index");
686         if (errors & I_ERR_ODD_DIR_ITEM)
687                 fprintf(stderr, ", odd dir item");
688         if (errors & I_ERR_ODD_FILE_EXTENT)
689                 fprintf(stderr, ", odd file extent");
690         if (errors & I_ERR_BAD_FILE_EXTENT)
691                 fprintf(stderr, ", bad file extent");
692         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
693                 fprintf(stderr, ", file extent overlap");
694         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
695                 fprintf(stderr, ", file extent discount");
696         if (errors & I_ERR_DIR_ISIZE_WRONG)
697                 fprintf(stderr, ", dir isize wrong");
698         if (errors & I_ERR_FILE_NBYTES_WRONG)
699                 fprintf(stderr, ", nbytes wrong");
700         if (errors & I_ERR_ODD_CSUM_ITEM)
701                 fprintf(stderr, ", odd csum item");
702         if (errors & I_ERR_SOME_CSUM_MISSING)
703                 fprintf(stderr, ", some csum missing");
704         if (errors & I_ERR_LINK_COUNT_WRONG)
705                 fprintf(stderr, ", link count wrong");
706         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
707                 fprintf(stderr, ", orphan file extent");
708         fprintf(stderr, "\n");
709         /* Print the orphan extents if needed */
710         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
711                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
712
713         /* Print the holes if needed */
714         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
715                 struct file_extent_hole *hole;
716                 struct rb_node *node;
717                 int found = 0;
718
719                 node = rb_first(&rec->holes);
720                 fprintf(stderr, "Found file extent holes:\n");
721                 while (node) {
722                         found = 1;
723                         hole = rb_entry(node, struct file_extent_hole, node);
724                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
725                                 hole->start, hole->len);
726                         node = rb_next(node);
727                 }
728                 if (!found)
729                         fprintf(stderr, "\tstart: 0, len: %llu\n",
730                                 round_up(rec->isize, root->sectorsize));
731         }
732 }
733
734 static void print_ref_error(int errors)
735 {
736         if (errors & REF_ERR_NO_DIR_ITEM)
737                 fprintf(stderr, ", no dir item");
738         if (errors & REF_ERR_NO_DIR_INDEX)
739                 fprintf(stderr, ", no dir index");
740         if (errors & REF_ERR_NO_INODE_REF)
741                 fprintf(stderr, ", no inode ref");
742         if (errors & REF_ERR_DUP_DIR_ITEM)
743                 fprintf(stderr, ", dup dir item");
744         if (errors & REF_ERR_DUP_DIR_INDEX)
745                 fprintf(stderr, ", dup dir index");
746         if (errors & REF_ERR_DUP_INODE_REF)
747                 fprintf(stderr, ", dup inode ref");
748         if (errors & REF_ERR_INDEX_UNMATCH)
749                 fprintf(stderr, ", index unmatch");
750         if (errors & REF_ERR_FILETYPE_UNMATCH)
751                 fprintf(stderr, ", filetype unmatch");
752         if (errors & REF_ERR_NAME_TOO_LONG)
753                 fprintf(stderr, ", name too long");
754         if (errors & REF_ERR_NO_ROOT_REF)
755                 fprintf(stderr, ", no root ref");
756         if (errors & REF_ERR_NO_ROOT_BACKREF)
757                 fprintf(stderr, ", no root backref");
758         if (errors & REF_ERR_DUP_ROOT_REF)
759                 fprintf(stderr, ", dup root ref");
760         if (errors & REF_ERR_DUP_ROOT_BACKREF)
761                 fprintf(stderr, ", dup root backref");
762         fprintf(stderr, "\n");
763 }
764
765 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
766                                           u64 ino, int mod)
767 {
768         struct ptr_node *node;
769         struct cache_extent *cache;
770         struct inode_record *rec = NULL;
771         int ret;
772
773         cache = lookup_cache_extent(inode_cache, ino, 1);
774         if (cache) {
775                 node = container_of(cache, struct ptr_node, cache);
776                 rec = node->data;
777                 if (mod && rec->refs > 1) {
778                         node->data = clone_inode_rec(rec);
779                         if (IS_ERR(node->data))
780                                 return node->data;
781                         rec->refs--;
782                         rec = node->data;
783                 }
784         } else if (mod) {
785                 rec = calloc(1, sizeof(*rec));
786                 if (!rec)
787                         return ERR_PTR(-ENOMEM);
788                 rec->ino = ino;
789                 rec->extent_start = (u64)-1;
790                 rec->refs = 1;
791                 INIT_LIST_HEAD(&rec->backrefs);
792                 INIT_LIST_HEAD(&rec->orphan_extents);
793                 rec->holes = RB_ROOT;
794
795                 node = malloc(sizeof(*node));
796                 if (!node) {
797                         free(rec);
798                         return ERR_PTR(-ENOMEM);
799                 }
800                 node->cache.start = ino;
801                 node->cache.size = 1;
802                 node->data = rec;
803
804                 if (ino == BTRFS_FREE_INO_OBJECTID)
805                         rec->found_link = 1;
806
807                 ret = insert_cache_extent(inode_cache, &node->cache);
808                 if (ret)
809                         return ERR_PTR(-EEXIST);
810         }
811         return rec;
812 }
813
814 static void free_orphan_data_extents(struct list_head *orphan_extents)
815 {
816         struct orphan_data_extent *orphan;
817
818         while (!list_empty(orphan_extents)) {
819                 orphan = list_entry(orphan_extents->next,
820                                     struct orphan_data_extent, list);
821                 list_del(&orphan->list);
822                 free(orphan);
823         }
824 }
825
826 static void free_inode_rec(struct inode_record *rec)
827 {
828         struct inode_backref *backref;
829
830         if (--rec->refs > 0)
831                 return;
832
833         while (!list_empty(&rec->backrefs)) {
834                 backref = list_entry(rec->backrefs.next,
835                                      struct inode_backref, list);
836                 list_del(&backref->list);
837                 free(backref);
838         }
839         free_orphan_data_extents(&rec->orphan_extents);
840         free_file_extent_holes(&rec->holes);
841         free(rec);
842 }
843
844 static int can_free_inode_rec(struct inode_record *rec)
845 {
846         if (!rec->errors && rec->checked && rec->found_inode_item &&
847             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
848                 return 1;
849         return 0;
850 }
851
852 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
853                                  struct inode_record *rec)
854 {
855         struct cache_extent *cache;
856         struct inode_backref *tmp, *backref;
857         struct ptr_node *node;
858         unsigned char filetype;
859
860         if (!rec->found_inode_item)
861                 return;
862
863         filetype = imode_to_type(rec->imode);
864         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
865                 if (backref->found_dir_item && backref->found_dir_index) {
866                         if (backref->filetype != filetype)
867                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
868                         if (!backref->errors && backref->found_inode_ref &&
869                             rec->nlink == rec->found_link) {
870                                 list_del(&backref->list);
871                                 free(backref);
872                         }
873                 }
874         }
875
876         if (!rec->checked || rec->merging)
877                 return;
878
879         if (S_ISDIR(rec->imode)) {
880                 if (rec->found_size != rec->isize)
881                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
882                 if (rec->found_file_extent)
883                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
884         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
885                 if (rec->found_dir_item)
886                         rec->errors |= I_ERR_ODD_DIR_ITEM;
887                 if (rec->found_size != rec->nbytes)
888                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
889                 if (rec->nlink > 0 && !no_holes &&
890                     (rec->extent_end < rec->isize ||
891                      first_extent_gap(&rec->holes) < rec->isize))
892                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
893         }
894
895         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
896                 if (rec->found_csum_item && rec->nodatasum)
897                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
898                 if (rec->some_csum_missing && !rec->nodatasum)
899                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
900         }
901
902         BUG_ON(rec->refs != 1);
903         if (can_free_inode_rec(rec)) {
904                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
905                 node = container_of(cache, struct ptr_node, cache);
906                 BUG_ON(node->data != rec);
907                 remove_cache_extent(inode_cache, &node->cache);
908                 free(node);
909                 free_inode_rec(rec);
910         }
911 }
912
913 static int check_orphan_item(struct btrfs_root *root, u64 ino)
914 {
915         struct btrfs_path path;
916         struct btrfs_key key;
917         int ret;
918
919         key.objectid = BTRFS_ORPHAN_OBJECTID;
920         key.type = BTRFS_ORPHAN_ITEM_KEY;
921         key.offset = ino;
922
923         btrfs_init_path(&path);
924         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
925         btrfs_release_path(&path);
926         if (ret > 0)
927                 ret = -ENOENT;
928         return ret;
929 }
930
931 static int process_inode_item(struct extent_buffer *eb,
932                               int slot, struct btrfs_key *key,
933                               struct shared_node *active_node)
934 {
935         struct inode_record *rec;
936         struct btrfs_inode_item *item;
937
938         rec = active_node->current;
939         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
940         if (rec->found_inode_item) {
941                 rec->errors |= I_ERR_DUP_INODE_ITEM;
942                 return 1;
943         }
944         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
945         rec->nlink = btrfs_inode_nlink(eb, item);
946         rec->isize = btrfs_inode_size(eb, item);
947         rec->nbytes = btrfs_inode_nbytes(eb, item);
948         rec->imode = btrfs_inode_mode(eb, item);
949         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
950                 rec->nodatasum = 1;
951         rec->found_inode_item = 1;
952         if (rec->nlink == 0)
953                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
954         maybe_free_inode_rec(&active_node->inode_cache, rec);
955         return 0;
956 }
957
958 static struct inode_backref *get_inode_backref(struct inode_record *rec,
959                                                 const char *name,
960                                                 int namelen, u64 dir)
961 {
962         struct inode_backref *backref;
963
964         list_for_each_entry(backref, &rec->backrefs, list) {
965                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
966                         break;
967                 if (backref->dir != dir || backref->namelen != namelen)
968                         continue;
969                 if (memcmp(name, backref->name, namelen))
970                         continue;
971                 return backref;
972         }
973
974         backref = malloc(sizeof(*backref) + namelen + 1);
975         if (!backref)
976                 return NULL;
977         memset(backref, 0, sizeof(*backref));
978         backref->dir = dir;
979         backref->namelen = namelen;
980         memcpy(backref->name, name, namelen);
981         backref->name[namelen] = '\0';
982         list_add_tail(&backref->list, &rec->backrefs);
983         return backref;
984 }
985
986 static int add_inode_backref(struct cache_tree *inode_cache,
987                              u64 ino, u64 dir, u64 index,
988                              const char *name, int namelen,
989                              int filetype, int itemtype, int errors)
990 {
991         struct inode_record *rec;
992         struct inode_backref *backref;
993
994         rec = get_inode_rec(inode_cache, ino, 1);
995         BUG_ON(IS_ERR(rec));
996         backref = get_inode_backref(rec, name, namelen, dir);
997         BUG_ON(!backref);
998         if (errors)
999                 backref->errors |= errors;
1000         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1001                 if (backref->found_dir_index)
1002                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1003                 if (backref->found_inode_ref && backref->index != index)
1004                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1005                 if (backref->found_dir_item && backref->filetype != filetype)
1006                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1007
1008                 backref->index = index;
1009                 backref->filetype = filetype;
1010                 backref->found_dir_index = 1;
1011         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1012                 rec->found_link++;
1013                 if (backref->found_dir_item)
1014                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1015                 if (backref->found_dir_index && backref->filetype != filetype)
1016                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1017
1018                 backref->filetype = filetype;
1019                 backref->found_dir_item = 1;
1020         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1021                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1022                 if (backref->found_inode_ref)
1023                         backref->errors |= REF_ERR_DUP_INODE_REF;
1024                 if (backref->found_dir_index && backref->index != index)
1025                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1026                 else
1027                         backref->index = index;
1028
1029                 backref->ref_type = itemtype;
1030                 backref->found_inode_ref = 1;
1031         } else {
1032                 BUG_ON(1);
1033         }
1034
1035         maybe_free_inode_rec(inode_cache, rec);
1036         return 0;
1037 }
1038
1039 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1040                             struct cache_tree *dst_cache)
1041 {
1042         struct inode_backref *backref;
1043         u32 dir_count = 0;
1044         int ret = 0;
1045
1046         dst->merging = 1;
1047         list_for_each_entry(backref, &src->backrefs, list) {
1048                 if (backref->found_dir_index) {
1049                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1050                                         backref->index, backref->name,
1051                                         backref->namelen, backref->filetype,
1052                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1053                 }
1054                 if (backref->found_dir_item) {
1055                         dir_count++;
1056                         add_inode_backref(dst_cache, dst->ino,
1057                                         backref->dir, 0, backref->name,
1058                                         backref->namelen, backref->filetype,
1059                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1060                 }
1061                 if (backref->found_inode_ref) {
1062                         add_inode_backref(dst_cache, dst->ino,
1063                                         backref->dir, backref->index,
1064                                         backref->name, backref->namelen, 0,
1065                                         backref->ref_type, backref->errors);
1066                 }
1067         }
1068
1069         if (src->found_dir_item)
1070                 dst->found_dir_item = 1;
1071         if (src->found_file_extent)
1072                 dst->found_file_extent = 1;
1073         if (src->found_csum_item)
1074                 dst->found_csum_item = 1;
1075         if (src->some_csum_missing)
1076                 dst->some_csum_missing = 1;
1077         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1078                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1079                 if (ret < 0)
1080                         return ret;
1081         }
1082
1083         BUG_ON(src->found_link < dir_count);
1084         dst->found_link += src->found_link - dir_count;
1085         dst->found_size += src->found_size;
1086         if (src->extent_start != (u64)-1) {
1087                 if (dst->extent_start == (u64)-1) {
1088                         dst->extent_start = src->extent_start;
1089                         dst->extent_end = src->extent_end;
1090                 } else {
1091                         if (dst->extent_end > src->extent_start)
1092                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1093                         else if (dst->extent_end < src->extent_start) {
1094                                 ret = add_file_extent_hole(&dst->holes,
1095                                         dst->extent_end,
1096                                         src->extent_start - dst->extent_end);
1097                         }
1098                         if (dst->extent_end < src->extent_end)
1099                                 dst->extent_end = src->extent_end;
1100                 }
1101         }
1102
1103         dst->errors |= src->errors;
1104         if (src->found_inode_item) {
1105                 if (!dst->found_inode_item) {
1106                         dst->nlink = src->nlink;
1107                         dst->isize = src->isize;
1108                         dst->nbytes = src->nbytes;
1109                         dst->imode = src->imode;
1110                         dst->nodatasum = src->nodatasum;
1111                         dst->found_inode_item = 1;
1112                 } else {
1113                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1114                 }
1115         }
1116         dst->merging = 0;
1117
1118         return 0;
1119 }
1120
1121 static int splice_shared_node(struct shared_node *src_node,
1122                               struct shared_node *dst_node)
1123 {
1124         struct cache_extent *cache;
1125         struct ptr_node *node, *ins;
1126         struct cache_tree *src, *dst;
1127         struct inode_record *rec, *conflict;
1128         u64 current_ino = 0;
1129         int splice = 0;
1130         int ret;
1131
1132         if (--src_node->refs == 0)
1133                 splice = 1;
1134         if (src_node->current)
1135                 current_ino = src_node->current->ino;
1136
1137         src = &src_node->root_cache;
1138         dst = &dst_node->root_cache;
1139 again:
1140         cache = search_cache_extent(src, 0);
1141         while (cache) {
1142                 node = container_of(cache, struct ptr_node, cache);
1143                 rec = node->data;
1144                 cache = next_cache_extent(cache);
1145
1146                 if (splice) {
1147                         remove_cache_extent(src, &node->cache);
1148                         ins = node;
1149                 } else {
1150                         ins = malloc(sizeof(*ins));
1151                         BUG_ON(!ins);
1152                         ins->cache.start = node->cache.start;
1153                         ins->cache.size = node->cache.size;
1154                         ins->data = rec;
1155                         rec->refs++;
1156                 }
1157                 ret = insert_cache_extent(dst, &ins->cache);
1158                 if (ret == -EEXIST) {
1159                         conflict = get_inode_rec(dst, rec->ino, 1);
1160                         BUG_ON(IS_ERR(conflict));
1161                         merge_inode_recs(rec, conflict, dst);
1162                         if (rec->checked) {
1163                                 conflict->checked = 1;
1164                                 if (dst_node->current == conflict)
1165                                         dst_node->current = NULL;
1166                         }
1167                         maybe_free_inode_rec(dst, conflict);
1168                         free_inode_rec(rec);
1169                         free(ins);
1170                 } else {
1171                         BUG_ON(ret);
1172                 }
1173         }
1174
1175         if (src == &src_node->root_cache) {
1176                 src = &src_node->inode_cache;
1177                 dst = &dst_node->inode_cache;
1178                 goto again;
1179         }
1180
1181         if (current_ino > 0 && (!dst_node->current ||
1182             current_ino > dst_node->current->ino)) {
1183                 if (dst_node->current) {
1184                         dst_node->current->checked = 1;
1185                         maybe_free_inode_rec(dst, dst_node->current);
1186                 }
1187                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1188                 BUG_ON(IS_ERR(dst_node->current));
1189         }
1190         return 0;
1191 }
1192
1193 static void free_inode_ptr(struct cache_extent *cache)
1194 {
1195         struct ptr_node *node;
1196         struct inode_record *rec;
1197
1198         node = container_of(cache, struct ptr_node, cache);
1199         rec = node->data;
1200         free_inode_rec(rec);
1201         free(node);
1202 }
1203
1204 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1205
1206 static struct shared_node *find_shared_node(struct cache_tree *shared,
1207                                             u64 bytenr)
1208 {
1209         struct cache_extent *cache;
1210         struct shared_node *node;
1211
1212         cache = lookup_cache_extent(shared, bytenr, 1);
1213         if (cache) {
1214                 node = container_of(cache, struct shared_node, cache);
1215                 return node;
1216         }
1217         return NULL;
1218 }
1219
1220 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1221 {
1222         int ret;
1223         struct shared_node *node;
1224
1225         node = calloc(1, sizeof(*node));
1226         if (!node)
1227                 return -ENOMEM;
1228         node->cache.start = bytenr;
1229         node->cache.size = 1;
1230         cache_tree_init(&node->root_cache);
1231         cache_tree_init(&node->inode_cache);
1232         node->refs = refs;
1233
1234         ret = insert_cache_extent(shared, &node->cache);
1235
1236         return ret;
1237 }
1238
1239 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1240                              struct walk_control *wc, int level)
1241 {
1242         struct shared_node *node;
1243         struct shared_node *dest;
1244         int ret;
1245
1246         if (level == wc->active_node)
1247                 return 0;
1248
1249         BUG_ON(wc->active_node <= level);
1250         node = find_shared_node(&wc->shared, bytenr);
1251         if (!node) {
1252                 ret = add_shared_node(&wc->shared, bytenr, refs);
1253                 BUG_ON(ret);
1254                 node = find_shared_node(&wc->shared, bytenr);
1255                 wc->nodes[level] = node;
1256                 wc->active_node = level;
1257                 return 0;
1258         }
1259
1260         if (wc->root_level == wc->active_node &&
1261             btrfs_root_refs(&root->root_item) == 0) {
1262                 if (--node->refs == 0) {
1263                         free_inode_recs_tree(&node->root_cache);
1264                         free_inode_recs_tree(&node->inode_cache);
1265                         remove_cache_extent(&wc->shared, &node->cache);
1266                         free(node);
1267                 }
1268                 return 1;
1269         }
1270
1271         dest = wc->nodes[wc->active_node];
1272         splice_shared_node(node, dest);
1273         if (node->refs == 0) {
1274                 remove_cache_extent(&wc->shared, &node->cache);
1275                 free(node);
1276         }
1277         return 1;
1278 }
1279
1280 static int leave_shared_node(struct btrfs_root *root,
1281                              struct walk_control *wc, int level)
1282 {
1283         struct shared_node *node;
1284         struct shared_node *dest;
1285         int i;
1286
1287         if (level == wc->root_level)
1288                 return 0;
1289
1290         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1291                 if (wc->nodes[i])
1292                         break;
1293         }
1294         BUG_ON(i >= BTRFS_MAX_LEVEL);
1295
1296         node = wc->nodes[wc->active_node];
1297         wc->nodes[wc->active_node] = NULL;
1298         wc->active_node = i;
1299
1300         dest = wc->nodes[wc->active_node];
1301         if (wc->active_node < wc->root_level ||
1302             btrfs_root_refs(&root->root_item) > 0) {
1303                 BUG_ON(node->refs <= 1);
1304                 splice_shared_node(node, dest);
1305         } else {
1306                 BUG_ON(node->refs < 2);
1307                 node->refs--;
1308         }
1309         return 0;
1310 }
1311
1312 /*
1313  * Returns:
1314  * < 0 - on error
1315  * 1   - if the root with id child_root_id is a child of root parent_root_id
1316  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1317  *       has other root(s) as parent(s)
1318  * 2   - if the root child_root_id doesn't have any parent roots
1319  */
1320 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1321                          u64 child_root_id)
1322 {
1323         struct btrfs_path path;
1324         struct btrfs_key key;
1325         struct extent_buffer *leaf;
1326         int has_parent = 0;
1327         int ret;
1328
1329         btrfs_init_path(&path);
1330
1331         key.objectid = parent_root_id;
1332         key.type = BTRFS_ROOT_REF_KEY;
1333         key.offset = child_root_id;
1334         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1335                                 0, 0);
1336         if (ret < 0)
1337                 return ret;
1338         btrfs_release_path(&path);
1339         if (!ret)
1340                 return 1;
1341
1342         key.objectid = child_root_id;
1343         key.type = BTRFS_ROOT_BACKREF_KEY;
1344         key.offset = 0;
1345         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1346                                 0, 0);
1347         if (ret < 0)
1348                 goto out;
1349
1350         while (1) {
1351                 leaf = path.nodes[0];
1352                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1353                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1354                         if (ret)
1355                                 break;
1356                         leaf = path.nodes[0];
1357                 }
1358
1359                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1360                 if (key.objectid != child_root_id ||
1361                     key.type != BTRFS_ROOT_BACKREF_KEY)
1362                         break;
1363
1364                 has_parent = 1;
1365
1366                 if (key.offset == parent_root_id) {
1367                         btrfs_release_path(&path);
1368                         return 1;
1369                 }
1370
1371                 path.slots[0]++;
1372         }
1373 out:
1374         btrfs_release_path(&path);
1375         if (ret < 0)
1376                 return ret;
1377         return has_parent ? 0 : 2;
1378 }
1379
1380 static int process_dir_item(struct btrfs_root *root,
1381                             struct extent_buffer *eb,
1382                             int slot, struct btrfs_key *key,
1383                             struct shared_node *active_node)
1384 {
1385         u32 total;
1386         u32 cur = 0;
1387         u32 len;
1388         u32 name_len;
1389         u32 data_len;
1390         int error;
1391         int nritems = 0;
1392         int filetype;
1393         struct btrfs_dir_item *di;
1394         struct inode_record *rec;
1395         struct cache_tree *root_cache;
1396         struct cache_tree *inode_cache;
1397         struct btrfs_key location;
1398         char namebuf[BTRFS_NAME_LEN];
1399
1400         root_cache = &active_node->root_cache;
1401         inode_cache = &active_node->inode_cache;
1402         rec = active_node->current;
1403         rec->found_dir_item = 1;
1404
1405         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1406         total = btrfs_item_size_nr(eb, slot);
1407         while (cur < total) {
1408                 nritems++;
1409                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1410                 name_len = btrfs_dir_name_len(eb, di);
1411                 data_len = btrfs_dir_data_len(eb, di);
1412                 filetype = btrfs_dir_type(eb, di);
1413
1414                 rec->found_size += name_len;
1415                 if (name_len <= BTRFS_NAME_LEN) {
1416                         len = name_len;
1417                         error = 0;
1418                 } else {
1419                         len = BTRFS_NAME_LEN;
1420                         error = REF_ERR_NAME_TOO_LONG;
1421                 }
1422                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1423
1424                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1425                         add_inode_backref(inode_cache, location.objectid,
1426                                           key->objectid, key->offset, namebuf,
1427                                           len, filetype, key->type, error);
1428                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1429                         add_inode_backref(root_cache, location.objectid,
1430                                           key->objectid, key->offset,
1431                                           namebuf, len, filetype,
1432                                           key->type, error);
1433                 } else {
1434                         fprintf(stderr, "invalid location in dir item %u\n",
1435                                 location.type);
1436                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1437                                           key->objectid, key->offset, namebuf,
1438                                           len, filetype, key->type, error);
1439                 }
1440
1441                 len = sizeof(*di) + name_len + data_len;
1442                 di = (struct btrfs_dir_item *)((char *)di + len);
1443                 cur += len;
1444         }
1445         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1446                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1447
1448         return 0;
1449 }
1450
1451 static int process_inode_ref(struct extent_buffer *eb,
1452                              int slot, struct btrfs_key *key,
1453                              struct shared_node *active_node)
1454 {
1455         u32 total;
1456         u32 cur = 0;
1457         u32 len;
1458         u32 name_len;
1459         u64 index;
1460         int error;
1461         struct cache_tree *inode_cache;
1462         struct btrfs_inode_ref *ref;
1463         char namebuf[BTRFS_NAME_LEN];
1464
1465         inode_cache = &active_node->inode_cache;
1466
1467         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1468         total = btrfs_item_size_nr(eb, slot);
1469         while (cur < total) {
1470                 name_len = btrfs_inode_ref_name_len(eb, ref);
1471                 index = btrfs_inode_ref_index(eb, ref);
1472                 if (name_len <= BTRFS_NAME_LEN) {
1473                         len = name_len;
1474                         error = 0;
1475                 } else {
1476                         len = BTRFS_NAME_LEN;
1477                         error = REF_ERR_NAME_TOO_LONG;
1478                 }
1479                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1480                 add_inode_backref(inode_cache, key->objectid, key->offset,
1481                                   index, namebuf, len, 0, key->type, error);
1482
1483                 len = sizeof(*ref) + name_len;
1484                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1485                 cur += len;
1486         }
1487         return 0;
1488 }
1489
1490 static int process_inode_extref(struct extent_buffer *eb,
1491                                 int slot, struct btrfs_key *key,
1492                                 struct shared_node *active_node)
1493 {
1494         u32 total;
1495         u32 cur = 0;
1496         u32 len;
1497         u32 name_len;
1498         u64 index;
1499         u64 parent;
1500         int error;
1501         struct cache_tree *inode_cache;
1502         struct btrfs_inode_extref *extref;
1503         char namebuf[BTRFS_NAME_LEN];
1504
1505         inode_cache = &active_node->inode_cache;
1506
1507         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1508         total = btrfs_item_size_nr(eb, slot);
1509         while (cur < total) {
1510                 name_len = btrfs_inode_extref_name_len(eb, extref);
1511                 index = btrfs_inode_extref_index(eb, extref);
1512                 parent = btrfs_inode_extref_parent(eb, extref);
1513                 if (name_len <= BTRFS_NAME_LEN) {
1514                         len = name_len;
1515                         error = 0;
1516                 } else {
1517                         len = BTRFS_NAME_LEN;
1518                         error = REF_ERR_NAME_TOO_LONG;
1519                 }
1520                 read_extent_buffer(eb, namebuf,
1521                                    (unsigned long)(extref + 1), len);
1522                 add_inode_backref(inode_cache, key->objectid, parent,
1523                                   index, namebuf, len, 0, key->type, error);
1524
1525                 len = sizeof(*extref) + name_len;
1526                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1527                 cur += len;
1528         }
1529         return 0;
1530
1531 }
1532
1533 static int count_csum_range(struct btrfs_root *root, u64 start,
1534                             u64 len, u64 *found)
1535 {
1536         struct btrfs_key key;
1537         struct btrfs_path path;
1538         struct extent_buffer *leaf;
1539         int ret;
1540         size_t size;
1541         *found = 0;
1542         u64 csum_end;
1543         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1544
1545         btrfs_init_path(&path);
1546
1547         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1548         key.offset = start;
1549         key.type = BTRFS_EXTENT_CSUM_KEY;
1550
1551         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1552                                 &key, &path, 0, 0);
1553         if (ret < 0)
1554                 goto out;
1555         if (ret > 0 && path.slots[0] > 0) {
1556                 leaf = path.nodes[0];
1557                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1558                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1559                     key.type == BTRFS_EXTENT_CSUM_KEY)
1560                         path.slots[0]--;
1561         }
1562
1563         while (len > 0) {
1564                 leaf = path.nodes[0];
1565                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1566                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1567                         if (ret > 0)
1568                                 break;
1569                         else if (ret < 0)
1570                                 goto out;
1571                         leaf = path.nodes[0];
1572                 }
1573
1574                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1575                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1576                     key.type != BTRFS_EXTENT_CSUM_KEY)
1577                         break;
1578
1579                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1580                 if (key.offset >= start + len)
1581                         break;
1582
1583                 if (key.offset > start)
1584                         start = key.offset;
1585
1586                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1587                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1588                 if (csum_end > start) {
1589                         size = min(csum_end - start, len);
1590                         len -= size;
1591                         start += size;
1592                         *found += size;
1593                 }
1594
1595                 path.slots[0]++;
1596         }
1597 out:
1598         btrfs_release_path(&path);
1599         if (ret < 0)
1600                 return ret;
1601         return 0;
1602 }
1603
1604 static int process_file_extent(struct btrfs_root *root,
1605                                 struct extent_buffer *eb,
1606                                 int slot, struct btrfs_key *key,
1607                                 struct shared_node *active_node)
1608 {
1609         struct inode_record *rec;
1610         struct btrfs_file_extent_item *fi;
1611         u64 num_bytes = 0;
1612         u64 disk_bytenr = 0;
1613         u64 extent_offset = 0;
1614         u64 mask = root->sectorsize - 1;
1615         int extent_type;
1616         int ret;
1617
1618         rec = active_node->current;
1619         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1620         rec->found_file_extent = 1;
1621
1622         if (rec->extent_start == (u64)-1) {
1623                 rec->extent_start = key->offset;
1624                 rec->extent_end = key->offset;
1625         }
1626
1627         if (rec->extent_end > key->offset)
1628                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1629         else if (rec->extent_end < key->offset) {
1630                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1631                                            key->offset - rec->extent_end);
1632                 if (ret < 0)
1633                         return ret;
1634         }
1635
1636         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1637         extent_type = btrfs_file_extent_type(eb, fi);
1638
1639         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1640                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1641                 if (num_bytes == 0)
1642                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1643                 rec->found_size += num_bytes;
1644                 num_bytes = (num_bytes + mask) & ~mask;
1645         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1646                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1647                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1648                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1649                 extent_offset = btrfs_file_extent_offset(eb, fi);
1650                 if (num_bytes == 0 || (num_bytes & mask))
1651                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1652                 if (num_bytes + extent_offset >
1653                     btrfs_file_extent_ram_bytes(eb, fi))
1654                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1655                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1656                     (btrfs_file_extent_compression(eb, fi) ||
1657                      btrfs_file_extent_encryption(eb, fi) ||
1658                      btrfs_file_extent_other_encoding(eb, fi)))
1659                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1660                 if (disk_bytenr > 0)
1661                         rec->found_size += num_bytes;
1662         } else {
1663                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1664         }
1665         rec->extent_end = key->offset + num_bytes;
1666
1667         /*
1668          * The data reloc tree will copy full extents into its inode and then
1669          * copy the corresponding csums.  Because the extent it copied could be
1670          * a preallocated extent that hasn't been written to yet there may be no
1671          * csums to copy, ergo we won't have csums for our file extent.  This is
1672          * ok so just don't bother checking csums if the inode belongs to the
1673          * data reloc tree.
1674          */
1675         if (disk_bytenr > 0 &&
1676             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1677                 u64 found;
1678                 if (btrfs_file_extent_compression(eb, fi))
1679                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1680                 else
1681                         disk_bytenr += extent_offset;
1682
1683                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1684                 if (ret < 0)
1685                         return ret;
1686                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1687                         if (found > 0)
1688                                 rec->found_csum_item = 1;
1689                         if (found < num_bytes)
1690                                 rec->some_csum_missing = 1;
1691                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1692                         if (found > 0)
1693                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1694                 }
1695         }
1696         return 0;
1697 }
1698
1699 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1700                             struct walk_control *wc)
1701 {
1702         struct btrfs_key key;
1703         u32 nritems;
1704         int i;
1705         int ret = 0;
1706         struct cache_tree *inode_cache;
1707         struct shared_node *active_node;
1708
1709         if (wc->root_level == wc->active_node &&
1710             btrfs_root_refs(&root->root_item) == 0)
1711                 return 0;
1712
1713         active_node = wc->nodes[wc->active_node];
1714         inode_cache = &active_node->inode_cache;
1715         nritems = btrfs_header_nritems(eb);
1716         for (i = 0; i < nritems; i++) {
1717                 btrfs_item_key_to_cpu(eb, &key, i);
1718
1719                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1720                         continue;
1721                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1722                         continue;
1723
1724                 if (active_node->current == NULL ||
1725                     active_node->current->ino < key.objectid) {
1726                         if (active_node->current) {
1727                                 active_node->current->checked = 1;
1728                                 maybe_free_inode_rec(inode_cache,
1729                                                      active_node->current);
1730                         }
1731                         active_node->current = get_inode_rec(inode_cache,
1732                                                              key.objectid, 1);
1733                         BUG_ON(IS_ERR(active_node->current));
1734                 }
1735                 switch (key.type) {
1736                 case BTRFS_DIR_ITEM_KEY:
1737                 case BTRFS_DIR_INDEX_KEY:
1738                         ret = process_dir_item(root, eb, i, &key, active_node);
1739                         break;
1740                 case BTRFS_INODE_REF_KEY:
1741                         ret = process_inode_ref(eb, i, &key, active_node);
1742                         break;
1743                 case BTRFS_INODE_EXTREF_KEY:
1744                         ret = process_inode_extref(eb, i, &key, active_node);
1745                         break;
1746                 case BTRFS_INODE_ITEM_KEY:
1747                         ret = process_inode_item(eb, i, &key, active_node);
1748                         break;
1749                 case BTRFS_EXTENT_DATA_KEY:
1750                         ret = process_file_extent(root, eb, i, &key,
1751                                                   active_node);
1752                         break;
1753                 default:
1754                         break;
1755                 };
1756         }
1757         return ret;
1758 }
1759
1760 static void reada_walk_down(struct btrfs_root *root,
1761                             struct extent_buffer *node, int slot)
1762 {
1763         u64 bytenr;
1764         u64 ptr_gen;
1765         u32 nritems;
1766         u32 blocksize;
1767         int i;
1768         int level;
1769
1770         level = btrfs_header_level(node);
1771         if (level != 1)
1772                 return;
1773
1774         nritems = btrfs_header_nritems(node);
1775         blocksize = root->nodesize;
1776         for (i = slot; i < nritems; i++) {
1777                 bytenr = btrfs_node_blockptr(node, i);
1778                 ptr_gen = btrfs_node_ptr_generation(node, i);
1779                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1780         }
1781 }
1782
1783 /*
1784  * Check the child node/leaf by the following condition:
1785  * 1. the first item key of the node/leaf should be the same with the one
1786  *    in parent.
1787  * 2. block in parent node should match the child node/leaf.
1788  * 3. generation of parent node and child's header should be consistent.
1789  *
1790  * Or the child node/leaf pointed by the key in parent is not valid.
1791  *
1792  * We hope to check leaf owner too, but since subvol may share leaves,
1793  * which makes leaf owner check not so strong, key check should be
1794  * sufficient enough for that case.
1795  */
1796 static int check_child_node(struct btrfs_root *root,
1797                             struct extent_buffer *parent, int slot,
1798                             struct extent_buffer *child)
1799 {
1800         struct btrfs_key parent_key;
1801         struct btrfs_key child_key;
1802         int ret = 0;
1803
1804         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1805         if (btrfs_header_level(child) == 0)
1806                 btrfs_item_key_to_cpu(child, &child_key, 0);
1807         else
1808                 btrfs_node_key_to_cpu(child, &child_key, 0);
1809
1810         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1811                 ret = -EINVAL;
1812                 fprintf(stderr,
1813                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1814                         parent_key.objectid, parent_key.type, parent_key.offset,
1815                         child_key.objectid, child_key.type, child_key.offset);
1816         }
1817         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1818                 ret = -EINVAL;
1819                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1820                         btrfs_node_blockptr(parent, slot),
1821                         btrfs_header_bytenr(child));
1822         }
1823         if (btrfs_node_ptr_generation(parent, slot) !=
1824             btrfs_header_generation(child)) {
1825                 ret = -EINVAL;
1826                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1827                         btrfs_header_generation(child),
1828                         btrfs_node_ptr_generation(parent, slot));
1829         }
1830         return ret;
1831 }
1832
1833 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1834                           struct walk_control *wc, int *level)
1835 {
1836         enum btrfs_tree_block_status status;
1837         u64 bytenr;
1838         u64 ptr_gen;
1839         struct extent_buffer *next;
1840         struct extent_buffer *cur;
1841         u32 blocksize;
1842         int ret, err = 0;
1843         u64 refs;
1844
1845         WARN_ON(*level < 0);
1846         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1847         ret = btrfs_lookup_extent_info(NULL, root,
1848                                        path->nodes[*level]->start,
1849                                        *level, 1, &refs, NULL);
1850         if (ret < 0) {
1851                 err = ret;
1852                 goto out;
1853         }
1854
1855         if (refs > 1) {
1856                 ret = enter_shared_node(root, path->nodes[*level]->start,
1857                                         refs, wc, *level);
1858                 if (ret > 0) {
1859                         err = ret;
1860                         goto out;
1861                 }
1862         }
1863
1864         while (*level >= 0) {
1865                 WARN_ON(*level < 0);
1866                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1867                 cur = path->nodes[*level];
1868
1869                 if (btrfs_header_level(cur) != *level)
1870                         WARN_ON(1);
1871
1872                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1873                         break;
1874                 if (*level == 0) {
1875                         ret = process_one_leaf(root, cur, wc);
1876                         if (ret < 0)
1877                                 err = ret;
1878                         break;
1879                 }
1880                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1881                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1882                 blocksize = root->nodesize;
1883                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1884                                                1, &refs, NULL);
1885                 if (ret < 0)
1886                         refs = 0;
1887
1888                 if (refs > 1) {
1889                         ret = enter_shared_node(root, bytenr, refs,
1890                                                 wc, *level - 1);
1891                         if (ret > 0) {
1892                                 path->slots[*level]++;
1893                                 continue;
1894                         }
1895                 }
1896
1897                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1898                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1899                         free_extent_buffer(next);
1900                         reada_walk_down(root, cur, path->slots[*level]);
1901                         next = read_tree_block(root, bytenr, blocksize,
1902                                                ptr_gen);
1903                         if (!extent_buffer_uptodate(next)) {
1904                                 struct btrfs_key node_key;
1905
1906                                 btrfs_node_key_to_cpu(path->nodes[*level],
1907                                                       &node_key,
1908                                                       path->slots[*level]);
1909                                 btrfs_add_corrupt_extent_record(root->fs_info,
1910                                                 &node_key,
1911                                                 path->nodes[*level]->start,
1912                                                 root->nodesize, *level);
1913                                 err = -EIO;
1914                                 goto out;
1915                         }
1916                 }
1917
1918                 ret = check_child_node(root, cur, path->slots[*level], next);
1919                 if (ret) {
1920                         err = ret;
1921                         goto out;
1922                 }
1923
1924                 if (btrfs_is_leaf(next))
1925                         status = btrfs_check_leaf(root, NULL, next);
1926                 else
1927                         status = btrfs_check_node(root, NULL, next);
1928                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1929                         free_extent_buffer(next);
1930                         err = -EIO;
1931                         goto out;
1932                 }
1933
1934                 *level = *level - 1;
1935                 free_extent_buffer(path->nodes[*level]);
1936                 path->nodes[*level] = next;
1937                 path->slots[*level] = 0;
1938         }
1939 out:
1940         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1941         return err;
1942 }
1943
1944 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1945                         struct walk_control *wc, int *level)
1946 {
1947         int i;
1948         struct extent_buffer *leaf;
1949
1950         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1951                 leaf = path->nodes[i];
1952                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1953                         path->slots[i]++;
1954                         *level = i;
1955                         return 0;
1956                 } else {
1957                         free_extent_buffer(path->nodes[*level]);
1958                         path->nodes[*level] = NULL;
1959                         BUG_ON(*level > wc->active_node);
1960                         if (*level == wc->active_node)
1961                                 leave_shared_node(root, wc, *level);
1962                         *level = i + 1;
1963                 }
1964         }
1965         return 1;
1966 }
1967
1968 static int check_root_dir(struct inode_record *rec)
1969 {
1970         struct inode_backref *backref;
1971         int ret = -1;
1972
1973         if (!rec->found_inode_item || rec->errors)
1974                 goto out;
1975         if (rec->nlink != 1 || rec->found_link != 0)
1976                 goto out;
1977         if (list_empty(&rec->backrefs))
1978                 goto out;
1979         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1980         if (!backref->found_inode_ref)
1981                 goto out;
1982         if (backref->index != 0 || backref->namelen != 2 ||
1983             memcmp(backref->name, "..", 2))
1984                 goto out;
1985         if (backref->found_dir_index || backref->found_dir_item)
1986                 goto out;
1987         ret = 0;
1988 out:
1989         return ret;
1990 }
1991
1992 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1993                               struct btrfs_root *root, struct btrfs_path *path,
1994                               struct inode_record *rec)
1995 {
1996         struct btrfs_inode_item *ei;
1997         struct btrfs_key key;
1998         int ret;
1999
2000         key.objectid = rec->ino;
2001         key.type = BTRFS_INODE_ITEM_KEY;
2002         key.offset = (u64)-1;
2003
2004         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2005         if (ret < 0)
2006                 goto out;
2007         if (ret) {
2008                 if (!path->slots[0]) {
2009                         ret = -ENOENT;
2010                         goto out;
2011                 }
2012                 path->slots[0]--;
2013                 ret = 0;
2014         }
2015         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2016         if (key.objectid != rec->ino) {
2017                 ret = -ENOENT;
2018                 goto out;
2019         }
2020
2021         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2022                             struct btrfs_inode_item);
2023         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2024         btrfs_mark_buffer_dirty(path->nodes[0]);
2025         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2026         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2027                root->root_key.objectid);
2028 out:
2029         btrfs_release_path(path);
2030         return ret;
2031 }
2032
2033 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2034                                     struct btrfs_root *root,
2035                                     struct btrfs_path *path,
2036                                     struct inode_record *rec)
2037 {
2038         int ret;
2039
2040         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2041         btrfs_release_path(path);
2042         if (!ret)
2043                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2044         return ret;
2045 }
2046
2047 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2048                                struct btrfs_root *root,
2049                                struct btrfs_path *path,
2050                                struct inode_record *rec)
2051 {
2052         struct btrfs_inode_item *ei;
2053         struct btrfs_key key;
2054         int ret = 0;
2055
2056         key.objectid = rec->ino;
2057         key.type = BTRFS_INODE_ITEM_KEY;
2058         key.offset = 0;
2059
2060         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2061         if (ret) {
2062                 if (ret > 0)
2063                         ret = -ENOENT;
2064                 goto out;
2065         }
2066
2067         /* Since ret == 0, no need to check anything */
2068         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2069                             struct btrfs_inode_item);
2070         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2071         btrfs_mark_buffer_dirty(path->nodes[0]);
2072         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2073         printf("reset nbytes for ino %llu root %llu\n",
2074                rec->ino, root->root_key.objectid);
2075 out:
2076         btrfs_release_path(path);
2077         return ret;
2078 }
2079
2080 static int add_missing_dir_index(struct btrfs_root *root,
2081                                  struct cache_tree *inode_cache,
2082                                  struct inode_record *rec,
2083                                  struct inode_backref *backref)
2084 {
2085         struct btrfs_path *path;
2086         struct btrfs_trans_handle *trans;
2087         struct btrfs_dir_item *dir_item;
2088         struct extent_buffer *leaf;
2089         struct btrfs_key key;
2090         struct btrfs_disk_key disk_key;
2091         struct inode_record *dir_rec;
2092         unsigned long name_ptr;
2093         u32 data_size = sizeof(*dir_item) + backref->namelen;
2094         int ret;
2095
2096         path = btrfs_alloc_path();
2097         if (!path)
2098                 return -ENOMEM;
2099
2100         trans = btrfs_start_transaction(root, 1);
2101         if (IS_ERR(trans)) {
2102                 btrfs_free_path(path);
2103                 return PTR_ERR(trans);
2104         }
2105
2106         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2107                 (unsigned long long)rec->ino);
2108         key.objectid = backref->dir;
2109         key.type = BTRFS_DIR_INDEX_KEY;
2110         key.offset = backref->index;
2111
2112         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2113         BUG_ON(ret);
2114
2115         leaf = path->nodes[0];
2116         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2117
2118         disk_key.objectid = cpu_to_le64(rec->ino);
2119         disk_key.type = BTRFS_INODE_ITEM_KEY;
2120         disk_key.offset = 0;
2121
2122         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2123         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2124         btrfs_set_dir_data_len(leaf, dir_item, 0);
2125         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2126         name_ptr = (unsigned long)(dir_item + 1);
2127         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2128         btrfs_mark_buffer_dirty(leaf);
2129         btrfs_free_path(path);
2130         btrfs_commit_transaction(trans, root);
2131
2132         backref->found_dir_index = 1;
2133         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2134         BUG_ON(IS_ERR(dir_rec));
2135         if (!dir_rec)
2136                 return 0;
2137         dir_rec->found_size += backref->namelen;
2138         if (dir_rec->found_size == dir_rec->isize &&
2139             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2140                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2141         if (dir_rec->found_size != dir_rec->isize)
2142                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2143
2144         return 0;
2145 }
2146
2147 static int delete_dir_index(struct btrfs_root *root,
2148                             struct cache_tree *inode_cache,
2149                             struct inode_record *rec,
2150                             struct inode_backref *backref)
2151 {
2152         struct btrfs_trans_handle *trans;
2153         struct btrfs_dir_item *di;
2154         struct btrfs_path *path;
2155         int ret = 0;
2156
2157         path = btrfs_alloc_path();
2158         if (!path)
2159                 return -ENOMEM;
2160
2161         trans = btrfs_start_transaction(root, 1);
2162         if (IS_ERR(trans)) {
2163                 btrfs_free_path(path);
2164                 return PTR_ERR(trans);
2165         }
2166
2167
2168         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2169                 (unsigned long long)backref->dir,
2170                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2171                 (unsigned long long)root->objectid);
2172
2173         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2174                                     backref->name, backref->namelen,
2175                                     backref->index, -1);
2176         if (IS_ERR(di)) {
2177                 ret = PTR_ERR(di);
2178                 btrfs_free_path(path);
2179                 btrfs_commit_transaction(trans, root);
2180                 if (ret == -ENOENT)
2181                         return 0;
2182                 return ret;
2183         }
2184
2185         if (!di)
2186                 ret = btrfs_del_item(trans, root, path);
2187         else
2188                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2189         BUG_ON(ret);
2190         btrfs_free_path(path);
2191         btrfs_commit_transaction(trans, root);
2192         return ret;
2193 }
2194
2195 static int create_inode_item(struct btrfs_root *root,
2196                              struct inode_record *rec,
2197                              struct inode_backref *backref, int root_dir)
2198 {
2199         struct btrfs_trans_handle *trans;
2200         struct btrfs_inode_item inode_item;
2201         time_t now = time(NULL);
2202         int ret;
2203
2204         trans = btrfs_start_transaction(root, 1);
2205         if (IS_ERR(trans)) {
2206                 ret = PTR_ERR(trans);
2207                 return ret;
2208         }
2209
2210         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2211                 "be incomplete, please check permissions and content after "
2212                 "the fsck completes.\n", (unsigned long long)root->objectid,
2213                 (unsigned long long)rec->ino);
2214
2215         memset(&inode_item, 0, sizeof(inode_item));
2216         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2217         if (root_dir)
2218                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2219         else
2220                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2221         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2222         if (rec->found_dir_item) {
2223                 if (rec->found_file_extent)
2224                         fprintf(stderr, "root %llu inode %llu has both a dir "
2225                                 "item and extents, unsure if it is a dir or a "
2226                                 "regular file so setting it as a directory\n",
2227                                 (unsigned long long)root->objectid,
2228                                 (unsigned long long)rec->ino);
2229                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2230                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2231         } else if (!rec->found_dir_item) {
2232                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2233                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2234         }
2235         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2236         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2237         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2238         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2239         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2240         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2241         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2242         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2243
2244         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2245         BUG_ON(ret);
2246         btrfs_commit_transaction(trans, root);
2247         return 0;
2248 }
2249
2250 static int repair_inode_backrefs(struct btrfs_root *root,
2251                                  struct inode_record *rec,
2252                                  struct cache_tree *inode_cache,
2253                                  int delete)
2254 {
2255         struct inode_backref *tmp, *backref;
2256         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2257         int ret = 0;
2258         int repaired = 0;
2259
2260         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2261                 if (!delete && rec->ino == root_dirid) {
2262                         if (!rec->found_inode_item) {
2263                                 ret = create_inode_item(root, rec, backref, 1);
2264                                 if (ret)
2265                                         break;
2266                                 repaired++;
2267                         }
2268                 }
2269
2270                 /* Index 0 for root dir's are special, don't mess with it */
2271                 if (rec->ino == root_dirid && backref->index == 0)
2272                         continue;
2273
2274                 if (delete &&
2275                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2276                      (backref->found_dir_index && backref->found_inode_ref &&
2277                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2278                         ret = delete_dir_index(root, inode_cache, rec, backref);
2279                         if (ret)
2280                                 break;
2281                         repaired++;
2282                         list_del(&backref->list);
2283                         free(backref);
2284                 }
2285
2286                 if (!delete && !backref->found_dir_index &&
2287                     backref->found_dir_item && backref->found_inode_ref) {
2288                         ret = add_missing_dir_index(root, inode_cache, rec,
2289                                                     backref);
2290                         if (ret)
2291                                 break;
2292                         repaired++;
2293                         if (backref->found_dir_item &&
2294                             backref->found_dir_index &&
2295                             backref->found_dir_index) {
2296                                 if (!backref->errors &&
2297                                     backref->found_inode_ref) {
2298                                         list_del(&backref->list);
2299                                         free(backref);
2300                                 }
2301                         }
2302                 }
2303
2304                 if (!delete && (!backref->found_dir_index &&
2305                                 !backref->found_dir_item &&
2306                                 backref->found_inode_ref)) {
2307                         struct btrfs_trans_handle *trans;
2308                         struct btrfs_key location;
2309
2310                         ret = check_dir_conflict(root, backref->name,
2311                                                  backref->namelen,
2312                                                  backref->dir,
2313                                                  backref->index);
2314                         if (ret) {
2315                                 /*
2316                                  * let nlink fixing routine to handle it,
2317                                  * which can do it better.
2318                                  */
2319                                 ret = 0;
2320                                 break;
2321                         }
2322                         location.objectid = rec->ino;
2323                         location.type = BTRFS_INODE_ITEM_KEY;
2324                         location.offset = 0;
2325
2326                         trans = btrfs_start_transaction(root, 1);
2327                         if (IS_ERR(trans)) {
2328                                 ret = PTR_ERR(trans);
2329                                 break;
2330                         }
2331                         fprintf(stderr, "adding missing dir index/item pair "
2332                                 "for inode %llu\n",
2333                                 (unsigned long long)rec->ino);
2334                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2335                                                     backref->namelen,
2336                                                     backref->dir, &location,
2337                                                     imode_to_type(rec->imode),
2338                                                     backref->index);
2339                         BUG_ON(ret);
2340                         btrfs_commit_transaction(trans, root);
2341                         repaired++;
2342                 }
2343
2344                 if (!delete && (backref->found_inode_ref &&
2345                                 backref->found_dir_index &&
2346                                 backref->found_dir_item &&
2347                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2348                                 !rec->found_inode_item)) {
2349                         ret = create_inode_item(root, rec, backref, 0);
2350                         if (ret)
2351                                 break;
2352                         repaired++;
2353                 }
2354
2355         }
2356         return ret ? ret : repaired;
2357 }
2358
2359 /*
2360  * To determine the file type for nlink/inode_item repair
2361  *
2362  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2363  * Return -ENOENT if file type is not found.
2364  */
2365 static int find_file_type(struct inode_record *rec, u8 *type)
2366 {
2367         struct inode_backref *backref;
2368
2369         /* For inode item recovered case */
2370         if (rec->found_inode_item) {
2371                 *type = imode_to_type(rec->imode);
2372                 return 0;
2373         }
2374
2375         list_for_each_entry(backref, &rec->backrefs, list) {
2376                 if (backref->found_dir_index || backref->found_dir_item) {
2377                         *type = backref->filetype;
2378                         return 0;
2379                 }
2380         }
2381         return -ENOENT;
2382 }
2383
2384 /*
2385  * To determine the file name for nlink repair
2386  *
2387  * Return 0 if file name is found, set name and namelen.
2388  * Return -ENOENT if file name is not found.
2389  */
2390 static int find_file_name(struct inode_record *rec,
2391                           char *name, int *namelen)
2392 {
2393         struct inode_backref *backref;
2394
2395         list_for_each_entry(backref, &rec->backrefs, list) {
2396                 if (backref->found_dir_index || backref->found_dir_item ||
2397                     backref->found_inode_ref) {
2398                         memcpy(name, backref->name, backref->namelen);
2399                         *namelen = backref->namelen;
2400                         return 0;
2401                 }
2402         }
2403         return -ENOENT;
2404 }
2405
2406 /* Reset the nlink of the inode to the correct one */
2407 static int reset_nlink(struct btrfs_trans_handle *trans,
2408                        struct btrfs_root *root,
2409                        struct btrfs_path *path,
2410                        struct inode_record *rec)
2411 {
2412         struct inode_backref *backref;
2413         struct inode_backref *tmp;
2414         struct btrfs_key key;
2415         struct btrfs_inode_item *inode_item;
2416         int ret = 0;
2417
2418         /* We don't believe this either, reset it and iterate backref */
2419         rec->found_link = 0;
2420
2421         /* Remove all backref including the valid ones */
2422         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2423                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2424                                    backref->index, backref->name,
2425                                    backref->namelen, 0);
2426                 if (ret < 0)
2427                         goto out;
2428
2429                 /* remove invalid backref, so it won't be added back */
2430                 if (!(backref->found_dir_index &&
2431                       backref->found_dir_item &&
2432                       backref->found_inode_ref)) {
2433                         list_del(&backref->list);
2434                         free(backref);
2435                 } else {
2436                         rec->found_link++;
2437                 }
2438         }
2439
2440         /* Set nlink to 0 */
2441         key.objectid = rec->ino;
2442         key.type = BTRFS_INODE_ITEM_KEY;
2443         key.offset = 0;
2444         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2445         if (ret < 0)
2446                 goto out;
2447         if (ret > 0) {
2448                 ret = -ENOENT;
2449                 goto out;
2450         }
2451         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2452                                     struct btrfs_inode_item);
2453         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2454         btrfs_mark_buffer_dirty(path->nodes[0]);
2455         btrfs_release_path(path);
2456
2457         /*
2458          * Add back valid inode_ref/dir_item/dir_index,
2459          * add_link() will handle the nlink inc, so new nlink must be correct
2460          */
2461         list_for_each_entry(backref, &rec->backrefs, list) {
2462                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2463                                      backref->name, backref->namelen,
2464                                      backref->filetype, &backref->index, 1);
2465                 if (ret < 0)
2466                         goto out;
2467         }
2468 out:
2469         btrfs_release_path(path);
2470         return ret;
2471 }
2472
2473 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2474                                struct btrfs_root *root,
2475                                struct btrfs_path *path,
2476                                struct inode_record *rec)
2477 {
2478         char *dir_name = "lost+found";
2479         char namebuf[BTRFS_NAME_LEN] = {0};
2480         u64 lost_found_ino;
2481         u32 mode = 0700;
2482         u8 type = 0;
2483         int namelen = 0;
2484         int name_recovered = 0;
2485         int type_recovered = 0;
2486         int ret = 0;
2487
2488         /*
2489          * Get file name and type first before these invalid inode ref
2490          * are deleted by remove_all_invalid_backref()
2491          */
2492         name_recovered = !find_file_name(rec, namebuf, &namelen);
2493         type_recovered = !find_file_type(rec, &type);
2494
2495         if (!name_recovered) {
2496                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2497                        rec->ino, rec->ino);
2498                 namelen = count_digits(rec->ino);
2499                 sprintf(namebuf, "%llu", rec->ino);
2500                 name_recovered = 1;
2501         }
2502         if (!type_recovered) {
2503                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2504                        rec->ino);
2505                 type = BTRFS_FT_REG_FILE;
2506                 type_recovered = 1;
2507         }
2508
2509         ret = reset_nlink(trans, root, path, rec);
2510         if (ret < 0) {
2511                 fprintf(stderr,
2512                         "Failed to reset nlink for inode %llu: %s\n",
2513                         rec->ino, strerror(-ret));
2514                 goto out;
2515         }
2516
2517         if (rec->found_link == 0) {
2518                 lost_found_ino = root->highest_inode;
2519                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2520                         ret = -EOVERFLOW;
2521                         goto out;
2522                 }
2523                 lost_found_ino++;
2524                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2525                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2526                                   mode);
2527                 if (ret < 0) {
2528                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2529                                 dir_name, strerror(-ret));
2530                         goto out;
2531                 }
2532                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2533                                      namebuf, namelen, type, NULL, 1);
2534                 /*
2535                  * Add ".INO" suffix several times to handle case where
2536                  * "FILENAME.INO" is already taken by another file.
2537                  */
2538                 while (ret == -EEXIST) {
2539                         /*
2540                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2541                          */
2542                         if (namelen + count_digits(rec->ino) + 1 >
2543                             BTRFS_NAME_LEN) {
2544                                 ret = -EFBIG;
2545                                 goto out;
2546                         }
2547                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2548                                  ".%llu", rec->ino);
2549                         namelen += count_digits(rec->ino) + 1;
2550                         ret = btrfs_add_link(trans, root, rec->ino,
2551                                              lost_found_ino, namebuf,
2552                                              namelen, type, NULL, 1);
2553                 }
2554                 if (ret < 0) {
2555                         fprintf(stderr,
2556                                 "Failed to link the inode %llu to %s dir: %s\n",
2557                                 rec->ino, dir_name, strerror(-ret));
2558                         goto out;
2559                 }
2560                 /*
2561                  * Just increase the found_link, don't actually add the
2562                  * backref. This will make things easier and this inode
2563                  * record will be freed after the repair is done.
2564                  * So fsck will not report problem about this inode.
2565                  */
2566                 rec->found_link++;
2567                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2568                        namelen, namebuf, dir_name);
2569         }
2570         printf("Fixed the nlink of inode %llu\n", rec->ino);
2571 out:
2572         /*
2573          * Clear the flag anyway, or we will loop forever for the same inode
2574          * as it will not be removed from the bad inode list and the dead loop
2575          * happens.
2576          */
2577         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2578         btrfs_release_path(path);
2579         return ret;
2580 }
2581
2582 /*
2583  * Check if there is any normal(reg or prealloc) file extent for given
2584  * ino.
2585  * This is used to determine the file type when neither its dir_index/item or
2586  * inode_item exists.
2587  *
2588  * This will *NOT* report error, if any error happens, just consider it does
2589  * not have any normal file extent.
2590  */
2591 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2592 {
2593         struct btrfs_path *path;
2594         struct btrfs_key key;
2595         struct btrfs_key found_key;
2596         struct btrfs_file_extent_item *fi;
2597         u8 type;
2598         int ret = 0;
2599
2600         path = btrfs_alloc_path();
2601         if (!path)
2602                 goto out;
2603         key.objectid = ino;
2604         key.type = BTRFS_EXTENT_DATA_KEY;
2605         key.offset = 0;
2606
2607         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2608         if (ret < 0) {
2609                 ret = 0;
2610                 goto out;
2611         }
2612         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2613                 ret = btrfs_next_leaf(root, path);
2614                 if (ret) {
2615                         ret = 0;
2616                         goto out;
2617                 }
2618         }
2619         while (1) {
2620                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2621                                       path->slots[0]);
2622                 if (found_key.objectid != ino ||
2623                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2624                         break;
2625                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2626                                     struct btrfs_file_extent_item);
2627                 type = btrfs_file_extent_type(path->nodes[0], fi);
2628                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2629                         ret = 1;
2630                         goto out;
2631                 }
2632         }
2633 out:
2634         btrfs_free_path(path);
2635         return ret;
2636 }
2637
2638 static u32 btrfs_type_to_imode(u8 type)
2639 {
2640         static u32 imode_by_btrfs_type[] = {
2641                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2642                 [BTRFS_FT_DIR]          = S_IFDIR,
2643                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2644                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2645                 [BTRFS_FT_FIFO]         = S_IFIFO,
2646                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2647                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2648         };
2649
2650         return imode_by_btrfs_type[(type)];
2651 }
2652
2653 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2654                                 struct btrfs_root *root,
2655                                 struct btrfs_path *path,
2656                                 struct inode_record *rec)
2657 {
2658         u8 filetype;
2659         u32 mode = 0700;
2660         int type_recovered = 0;
2661         int ret = 0;
2662
2663         printf("Trying to rebuild inode:%llu\n", rec->ino);
2664
2665         type_recovered = !find_file_type(rec, &filetype);
2666
2667         /*
2668          * Try to determine inode type if type not found.
2669          *
2670          * For found regular file extent, it must be FILE.
2671          * For found dir_item/index, it must be DIR.
2672          *
2673          * For undetermined one, use FILE as fallback.
2674          *
2675          * TODO:
2676          * 1. If found backref(inode_index/item is already handled) to it,
2677          *    it must be DIR.
2678          *    Need new inode-inode ref structure to allow search for that.
2679          */
2680         if (!type_recovered) {
2681                 if (rec->found_file_extent &&
2682                     find_normal_file_extent(root, rec->ino)) {
2683                         type_recovered = 1;
2684                         filetype = BTRFS_FT_REG_FILE;
2685                 } else if (rec->found_dir_item) {
2686                         type_recovered = 1;
2687                         filetype = BTRFS_FT_DIR;
2688                 } else if (!list_empty(&rec->orphan_extents)) {
2689                         type_recovered = 1;
2690                         filetype = BTRFS_FT_REG_FILE;
2691                 } else{
2692                         printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2693                                rec->ino);
2694                         type_recovered = 1;
2695                         filetype = BTRFS_FT_REG_FILE;
2696                 }
2697         }
2698
2699         ret = btrfs_new_inode(trans, root, rec->ino,
2700                               mode | btrfs_type_to_imode(filetype));
2701         if (ret < 0)
2702                 goto out;
2703
2704         /*
2705          * Here inode rebuild is done, we only rebuild the inode item,
2706          * don't repair the nlink(like move to lost+found).
2707          * That is the job of nlink repair.
2708          *
2709          * We just fill the record and return
2710          */
2711         rec->found_dir_item = 1;
2712         rec->imode = mode | btrfs_type_to_imode(filetype);
2713         rec->nlink = 0;
2714         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2715         /* Ensure the inode_nlinks repair function will be called */
2716         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2717 out:
2718         return ret;
2719 }
2720
2721 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2722                                       struct btrfs_root *root,
2723                                       struct btrfs_path *path,
2724                                       struct inode_record *rec)
2725 {
2726         struct orphan_data_extent *orphan;
2727         struct orphan_data_extent *tmp;
2728         int ret = 0;
2729
2730         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2731                 /*
2732                  * Check for conflicting file extents
2733                  *
2734                  * Here we don't know whether the extents is compressed or not,
2735                  * so we can only assume it not compressed nor data offset,
2736                  * and use its disk_len as extent length.
2737                  */
2738                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2739                                        orphan->offset, orphan->disk_len, 0);
2740                 btrfs_release_path(path);
2741                 if (ret < 0)
2742                         goto out;
2743                 if (!ret) {
2744                         fprintf(stderr,
2745                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2746                                 orphan->disk_bytenr, orphan->disk_len);
2747                         ret = btrfs_free_extent(trans,
2748                                         root->fs_info->extent_root,
2749                                         orphan->disk_bytenr, orphan->disk_len,
2750                                         0, root->objectid, orphan->objectid,
2751                                         orphan->offset);
2752                         if (ret < 0)
2753                                 goto out;
2754                 }
2755                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2756                                 orphan->offset, orphan->disk_bytenr,
2757                                 orphan->disk_len, orphan->disk_len);
2758                 if (ret < 0)
2759                         goto out;
2760
2761                 /* Update file size info */
2762                 rec->found_size += orphan->disk_len;
2763                 if (rec->found_size == rec->nbytes)
2764                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2765
2766                 /* Update the file extent hole info too */
2767                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2768                                            orphan->disk_len);
2769                 if (ret < 0)
2770                         goto out;
2771                 if (RB_EMPTY_ROOT(&rec->holes))
2772                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2773
2774                 list_del(&orphan->list);
2775                 free(orphan);
2776         }
2777         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2778 out:
2779         return ret;
2780 }
2781
2782 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2783                                         struct btrfs_root *root,
2784                                         struct btrfs_path *path,
2785                                         struct inode_record *rec)
2786 {
2787         struct rb_node *node;
2788         struct file_extent_hole *hole;
2789         int found = 0;
2790         int ret = 0;
2791
2792         node = rb_first(&rec->holes);
2793
2794         while (node) {
2795                 found = 1;
2796                 hole = rb_entry(node, struct file_extent_hole, node);
2797                 ret = btrfs_punch_hole(trans, root, rec->ino,
2798                                        hole->start, hole->len);
2799                 if (ret < 0)
2800                         goto out;
2801                 ret = del_file_extent_hole(&rec->holes, hole->start,
2802                                            hole->len);
2803                 if (ret < 0)
2804                         goto out;
2805                 if (RB_EMPTY_ROOT(&rec->holes))
2806                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2807                 node = rb_first(&rec->holes);
2808         }
2809         /* special case for a file losing all its file extent */
2810         if (!found) {
2811                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2812                                        round_up(rec->isize, root->sectorsize));
2813                 if (ret < 0)
2814                         goto out;
2815         }
2816         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2817                rec->ino, root->objectid);
2818 out:
2819         return ret;
2820 }
2821
2822 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2823 {
2824         struct btrfs_trans_handle *trans;
2825         struct btrfs_path *path;
2826         int ret = 0;
2827
2828         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2829                              I_ERR_NO_ORPHAN_ITEM |
2830                              I_ERR_LINK_COUNT_WRONG |
2831                              I_ERR_NO_INODE_ITEM |
2832                              I_ERR_FILE_EXTENT_ORPHAN |
2833                              I_ERR_FILE_EXTENT_DISCOUNT|
2834                              I_ERR_FILE_NBYTES_WRONG)))
2835                 return rec->errors;
2836
2837         path = btrfs_alloc_path();
2838         if (!path)
2839                 return -ENOMEM;
2840
2841         /*
2842          * For nlink repair, it may create a dir and add link, so
2843          * 2 for parent(256)'s dir_index and dir_item
2844          * 2 for lost+found dir's inode_item and inode_ref
2845          * 1 for the new inode_ref of the file
2846          * 2 for lost+found dir's dir_index and dir_item for the file
2847          */
2848         trans = btrfs_start_transaction(root, 7);
2849         if (IS_ERR(trans)) {
2850                 btrfs_free_path(path);
2851                 return PTR_ERR(trans);
2852         }
2853
2854         if (rec->errors & I_ERR_NO_INODE_ITEM)
2855                 ret = repair_inode_no_item(trans, root, path, rec);
2856         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2857                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2858         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2859                 ret = repair_inode_discount_extent(trans, root, path, rec);
2860         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2861                 ret = repair_inode_isize(trans, root, path, rec);
2862         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2863                 ret = repair_inode_orphan_item(trans, root, path, rec);
2864         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2865                 ret = repair_inode_nlinks(trans, root, path, rec);
2866         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2867                 ret = repair_inode_nbytes(trans, root, path, rec);
2868         btrfs_commit_transaction(trans, root);
2869         btrfs_free_path(path);
2870         return ret;
2871 }
2872
2873 static int check_inode_recs(struct btrfs_root *root,
2874                             struct cache_tree *inode_cache)
2875 {
2876         struct cache_extent *cache;
2877         struct ptr_node *node;
2878         struct inode_record *rec;
2879         struct inode_backref *backref;
2880         int stage = 0;
2881         int ret = 0;
2882         int err = 0;
2883         u64 error = 0;
2884         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2885
2886         if (btrfs_root_refs(&root->root_item) == 0) {
2887                 if (!cache_tree_empty(inode_cache))
2888                         fprintf(stderr, "warning line %d\n", __LINE__);
2889                 return 0;
2890         }
2891
2892         /*
2893          * We need to record the highest inode number for later 'lost+found'
2894          * dir creation.
2895          * We must select a ino not used/refered by any existing inode, or
2896          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2897          * this may cause 'lost+found' dir has wrong nlinks.
2898          */
2899         cache = last_cache_extent(inode_cache);
2900         if (cache) {
2901                 node = container_of(cache, struct ptr_node, cache);
2902                 rec = node->data;
2903                 if (rec->ino > root->highest_inode)
2904                         root->highest_inode = rec->ino;
2905         }
2906
2907         /*
2908          * We need to repair backrefs first because we could change some of the
2909          * errors in the inode recs.
2910          *
2911          * We also need to go through and delete invalid backrefs first and then
2912          * add the correct ones second.  We do this because we may get EEXIST
2913          * when adding back the correct index because we hadn't yet deleted the
2914          * invalid index.
2915          *
2916          * For example, if we were missing a dir index then the directories
2917          * isize would be wrong, so if we fixed the isize to what we thought it
2918          * would be and then fixed the backref we'd still have a invalid fs, so
2919          * we need to add back the dir index and then check to see if the isize
2920          * is still wrong.
2921          */
2922         while (stage < 3) {
2923                 stage++;
2924                 if (stage == 3 && !err)
2925                         break;
2926
2927                 cache = search_cache_extent(inode_cache, 0);
2928                 while (repair && cache) {
2929                         node = container_of(cache, struct ptr_node, cache);
2930                         rec = node->data;
2931                         cache = next_cache_extent(cache);
2932
2933                         /* Need to free everything up and rescan */
2934                         if (stage == 3) {
2935                                 remove_cache_extent(inode_cache, &node->cache);
2936                                 free(node);
2937                                 free_inode_rec(rec);
2938                                 continue;
2939                         }
2940
2941                         if (list_empty(&rec->backrefs))
2942                                 continue;
2943
2944                         ret = repair_inode_backrefs(root, rec, inode_cache,
2945                                                     stage == 1);
2946                         if (ret < 0) {
2947                                 err = ret;
2948                                 stage = 2;
2949                                 break;
2950                         } if (ret > 0) {
2951                                 err = -EAGAIN;
2952                         }
2953                 }
2954         }
2955         if (err)
2956                 return err;
2957
2958         rec = get_inode_rec(inode_cache, root_dirid, 0);
2959         BUG_ON(IS_ERR(rec));
2960         if (rec) {
2961                 ret = check_root_dir(rec);
2962                 if (ret) {
2963                         fprintf(stderr, "root %llu root dir %llu error\n",
2964                                 (unsigned long long)root->root_key.objectid,
2965                                 (unsigned long long)root_dirid);
2966                         print_inode_error(root, rec);
2967                         error++;
2968                 }
2969         } else {
2970                 if (repair) {
2971                         struct btrfs_trans_handle *trans;
2972
2973                         trans = btrfs_start_transaction(root, 1);
2974                         if (IS_ERR(trans)) {
2975                                 err = PTR_ERR(trans);
2976                                 return err;
2977                         }
2978
2979                         fprintf(stderr,
2980                                 "root %llu missing its root dir, recreating\n",
2981                                 (unsigned long long)root->objectid);
2982
2983                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2984                         BUG_ON(ret);
2985
2986                         btrfs_commit_transaction(trans, root);
2987                         return -EAGAIN;
2988                 }
2989
2990                 fprintf(stderr, "root %llu root dir %llu not found\n",
2991                         (unsigned long long)root->root_key.objectid,
2992                         (unsigned long long)root_dirid);
2993         }
2994
2995         while (1) {
2996                 cache = search_cache_extent(inode_cache, 0);
2997                 if (!cache)
2998                         break;
2999                 node = container_of(cache, struct ptr_node, cache);
3000                 rec = node->data;
3001                 remove_cache_extent(inode_cache, &node->cache);
3002                 free(node);
3003                 if (rec->ino == root_dirid ||
3004                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3005                         free_inode_rec(rec);
3006                         continue;
3007                 }
3008
3009                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3010                         ret = check_orphan_item(root, rec->ino);
3011                         if (ret == 0)
3012                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3013                         if (can_free_inode_rec(rec)) {
3014                                 free_inode_rec(rec);
3015                                 continue;
3016                         }
3017                 }
3018
3019                 if (!rec->found_inode_item)
3020                         rec->errors |= I_ERR_NO_INODE_ITEM;
3021                 if (rec->found_link != rec->nlink)
3022                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3023                 if (repair) {
3024                         ret = try_repair_inode(root, rec);
3025                         if (ret == 0 && can_free_inode_rec(rec)) {
3026                                 free_inode_rec(rec);
3027                                 continue;
3028                         }
3029                         ret = 0;
3030                 }
3031
3032                 if (!(repair && ret == 0))
3033                         error++;
3034                 print_inode_error(root, rec);
3035                 list_for_each_entry(backref, &rec->backrefs, list) {
3036                         if (!backref->found_dir_item)
3037                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3038                         if (!backref->found_dir_index)
3039                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3040                         if (!backref->found_inode_ref)
3041                                 backref->errors |= REF_ERR_NO_INODE_REF;
3042                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3043                                 " namelen %u name %s filetype %d errors %x",
3044                                 (unsigned long long)backref->dir,
3045                                 (unsigned long long)backref->index,
3046                                 backref->namelen, backref->name,
3047                                 backref->filetype, backref->errors);
3048                         print_ref_error(backref->errors);
3049                 }
3050                 free_inode_rec(rec);
3051         }
3052         return (error > 0) ? -1 : 0;
3053 }
3054
3055 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3056                                         u64 objectid)
3057 {
3058         struct cache_extent *cache;
3059         struct root_record *rec = NULL;
3060         int ret;
3061
3062         cache = lookup_cache_extent(root_cache, objectid, 1);
3063         if (cache) {
3064                 rec = container_of(cache, struct root_record, cache);
3065         } else {
3066                 rec = calloc(1, sizeof(*rec));
3067                 if (!rec)
3068                         return ERR_PTR(-ENOMEM);
3069                 rec->objectid = objectid;
3070                 INIT_LIST_HEAD(&rec->backrefs);
3071                 rec->cache.start = objectid;
3072                 rec->cache.size = 1;
3073
3074                 ret = insert_cache_extent(root_cache, &rec->cache);
3075                 if (ret)
3076                         return ERR_PTR(-EEXIST);
3077         }
3078         return rec;
3079 }
3080
3081 static struct root_backref *get_root_backref(struct root_record *rec,
3082                                              u64 ref_root, u64 dir, u64 index,
3083                                              const char *name, int namelen)
3084 {
3085         struct root_backref *backref;
3086
3087         list_for_each_entry(backref, &rec->backrefs, list) {
3088                 if (backref->ref_root != ref_root || backref->dir != dir ||
3089                     backref->namelen != namelen)
3090                         continue;
3091                 if (memcmp(name, backref->name, namelen))
3092                         continue;
3093                 return backref;
3094         }
3095
3096         backref = calloc(1, sizeof(*backref) + namelen + 1);
3097         if (!backref)
3098                 return NULL;
3099         backref->ref_root = ref_root;
3100         backref->dir = dir;
3101         backref->index = index;
3102         backref->namelen = namelen;
3103         memcpy(backref->name, name, namelen);
3104         backref->name[namelen] = '\0';
3105         list_add_tail(&backref->list, &rec->backrefs);
3106         return backref;
3107 }
3108
3109 static void free_root_record(struct cache_extent *cache)
3110 {
3111         struct root_record *rec;
3112         struct root_backref *backref;
3113
3114         rec = container_of(cache, struct root_record, cache);
3115         while (!list_empty(&rec->backrefs)) {
3116                 backref = list_entry(rec->backrefs.next,
3117                                      struct root_backref, list);
3118                 list_del(&backref->list);
3119                 free(backref);
3120         }
3121
3122         kfree(rec);
3123 }
3124
3125 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3126
3127 static int add_root_backref(struct cache_tree *root_cache,
3128                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3129                             const char *name, int namelen,
3130                             int item_type, int errors)
3131 {
3132         struct root_record *rec;
3133         struct root_backref *backref;
3134
3135         rec = get_root_rec(root_cache, root_id);
3136         BUG_ON(IS_ERR(rec));
3137         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3138         BUG_ON(!backref);
3139
3140         backref->errors |= errors;
3141
3142         if (item_type != BTRFS_DIR_ITEM_KEY) {
3143                 if (backref->found_dir_index || backref->found_back_ref ||
3144                     backref->found_forward_ref) {
3145                         if (backref->index != index)
3146                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3147                 } else {
3148                         backref->index = index;
3149                 }
3150         }
3151
3152         if (item_type == BTRFS_DIR_ITEM_KEY) {
3153                 if (backref->found_forward_ref)
3154                         rec->found_ref++;
3155                 backref->found_dir_item = 1;
3156         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3157                 backref->found_dir_index = 1;
3158         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3159                 if (backref->found_forward_ref)
3160                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3161                 else if (backref->found_dir_item)
3162                         rec->found_ref++;
3163                 backref->found_forward_ref = 1;
3164         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3165                 if (backref->found_back_ref)
3166                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3167                 backref->found_back_ref = 1;
3168         } else {
3169                 BUG_ON(1);
3170         }
3171
3172         if (backref->found_forward_ref && backref->found_dir_item)
3173                 backref->reachable = 1;
3174         return 0;
3175 }
3176
3177 static int merge_root_recs(struct btrfs_root *root,
3178                            struct cache_tree *src_cache,
3179                            struct cache_tree *dst_cache)
3180 {
3181         struct cache_extent *cache;
3182         struct ptr_node *node;
3183         struct inode_record *rec;
3184         struct inode_backref *backref;
3185         int ret = 0;
3186
3187         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3188                 free_inode_recs_tree(src_cache);
3189                 return 0;
3190         }
3191
3192         while (1) {
3193                 cache = search_cache_extent(src_cache, 0);
3194                 if (!cache)
3195                         break;
3196                 node = container_of(cache, struct ptr_node, cache);
3197                 rec = node->data;
3198                 remove_cache_extent(src_cache, &node->cache);
3199                 free(node);
3200
3201                 ret = is_child_root(root, root->objectid, rec->ino);
3202                 if (ret < 0)
3203                         break;
3204                 else if (ret == 0)
3205                         goto skip;
3206
3207                 list_for_each_entry(backref, &rec->backrefs, list) {
3208                         BUG_ON(backref->found_inode_ref);
3209                         if (backref->found_dir_item)
3210                                 add_root_backref(dst_cache, rec->ino,
3211                                         root->root_key.objectid, backref->dir,
3212                                         backref->index, backref->name,
3213                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3214                                         backref->errors);
3215                         if (backref->found_dir_index)
3216                                 add_root_backref(dst_cache, rec->ino,
3217                                         root->root_key.objectid, backref->dir,
3218                                         backref->index, backref->name,
3219                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3220                                         backref->errors);
3221                 }
3222 skip:
3223                 free_inode_rec(rec);
3224         }
3225         if (ret < 0)
3226                 return ret;
3227         return 0;
3228 }
3229
3230 static int check_root_refs(struct btrfs_root *root,
3231                            struct cache_tree *root_cache)
3232 {
3233         struct root_record *rec;
3234         struct root_record *ref_root;
3235         struct root_backref *backref;
3236         struct cache_extent *cache;
3237         int loop = 1;
3238         int ret;
3239         int error;
3240         int errors = 0;
3241
3242         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3243         BUG_ON(IS_ERR(rec));
3244         rec->found_ref = 1;
3245
3246         /* fixme: this can not detect circular references */
3247         while (loop) {
3248                 loop = 0;
3249                 cache = search_cache_extent(root_cache, 0);
3250                 while (1) {
3251                         if (!cache)
3252                                 break;
3253                         rec = container_of(cache, struct root_record, cache);
3254                         cache = next_cache_extent(cache);
3255
3256                         if (rec->found_ref == 0)
3257                                 continue;
3258
3259                         list_for_each_entry(backref, &rec->backrefs, list) {
3260                                 if (!backref->reachable)
3261                                         continue;
3262
3263                                 ref_root = get_root_rec(root_cache,
3264                                                         backref->ref_root);
3265                                 BUG_ON(IS_ERR(ref_root));
3266                                 if (ref_root->found_ref > 0)
3267                                         continue;
3268
3269                                 backref->reachable = 0;
3270                                 rec->found_ref--;
3271                                 if (rec->found_ref == 0)
3272                                         loop = 1;
3273                         }
3274                 }
3275         }
3276
3277         cache = search_cache_extent(root_cache, 0);
3278         while (1) {
3279                 if (!cache)
3280                         break;
3281                 rec = container_of(cache, struct root_record, cache);
3282                 cache = next_cache_extent(cache);
3283
3284                 if (rec->found_ref == 0 &&
3285                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3286                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3287                         ret = check_orphan_item(root->fs_info->tree_root,
3288                                                 rec->objectid);
3289                         if (ret == 0)
3290                                 continue;
3291
3292                         /*
3293                          * If we don't have a root item then we likely just have
3294                          * a dir item in a snapshot for this root but no actual
3295                          * ref key or anything so it's meaningless.
3296                          */
3297                         if (!rec->found_root_item)
3298                                 continue;
3299                         errors++;
3300                         fprintf(stderr, "fs tree %llu not referenced\n",
3301                                 (unsigned long long)rec->objectid);
3302                 }
3303
3304                 error = 0;
3305                 if (rec->found_ref > 0 && !rec->found_root_item)
3306                         error = 1;
3307                 list_for_each_entry(backref, &rec->backrefs, list) {
3308                         if (!backref->found_dir_item)
3309                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3310                         if (!backref->found_dir_index)
3311                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3312                         if (!backref->found_back_ref)
3313                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3314                         if (!backref->found_forward_ref)
3315                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3316                         if (backref->reachable && backref->errors)
3317                                 error = 1;
3318                 }
3319                 if (!error)
3320                         continue;
3321
3322                 errors++;
3323                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3324                         (unsigned long long)rec->objectid, rec->found_ref,
3325                          rec->found_root_item ? "" : "not found");
3326
3327                 list_for_each_entry(backref, &rec->backrefs, list) {
3328                         if (!backref->reachable)
3329                                 continue;
3330                         if (!backref->errors && rec->found_root_item)
3331                                 continue;
3332                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3333                                 " index %llu namelen %u name %s errors %x\n",
3334                                 (unsigned long long)backref->ref_root,
3335                                 (unsigned long long)backref->dir,
3336                                 (unsigned long long)backref->index,
3337                                 backref->namelen, backref->name,
3338                                 backref->errors);
3339                         print_ref_error(backref->errors);
3340                 }
3341         }
3342         return errors > 0 ? 1 : 0;
3343 }
3344
3345 static int process_root_ref(struct extent_buffer *eb, int slot,
3346                             struct btrfs_key *key,
3347                             struct cache_tree *root_cache)
3348 {
3349         u64 dirid;
3350         u64 index;
3351         u32 len;
3352         u32 name_len;
3353         struct btrfs_root_ref *ref;
3354         char namebuf[BTRFS_NAME_LEN];
3355         int error;
3356
3357         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3358
3359         dirid = btrfs_root_ref_dirid(eb, ref);
3360         index = btrfs_root_ref_sequence(eb, ref);
3361         name_len = btrfs_root_ref_name_len(eb, ref);
3362
3363         if (name_len <= BTRFS_NAME_LEN) {
3364                 len = name_len;
3365                 error = 0;
3366         } else {
3367                 len = BTRFS_NAME_LEN;
3368                 error = REF_ERR_NAME_TOO_LONG;
3369         }
3370         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3371
3372         if (key->type == BTRFS_ROOT_REF_KEY) {
3373                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3374                                  index, namebuf, len, key->type, error);
3375         } else {
3376                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3377                                  index, namebuf, len, key->type, error);
3378         }
3379         return 0;
3380 }
3381
3382 static void free_corrupt_block(struct cache_extent *cache)
3383 {
3384         struct btrfs_corrupt_block *corrupt;
3385
3386         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3387         free(corrupt);
3388 }
3389
3390 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3391
3392 /*
3393  * Repair the btree of the given root.
3394  *
3395  * The fix is to remove the node key in corrupt_blocks cache_tree.
3396  * and rebalance the tree.
3397  * After the fix, the btree should be writeable.
3398  */
3399 static int repair_btree(struct btrfs_root *root,
3400                         struct cache_tree *corrupt_blocks)
3401 {
3402         struct btrfs_trans_handle *trans;
3403         struct btrfs_path *path;
3404         struct btrfs_corrupt_block *corrupt;
3405         struct cache_extent *cache;
3406         struct btrfs_key key;
3407         u64 offset;
3408         int level;
3409         int ret = 0;
3410
3411         if (cache_tree_empty(corrupt_blocks))
3412                 return 0;
3413
3414         path = btrfs_alloc_path();
3415         if (!path)
3416                 return -ENOMEM;
3417
3418         trans = btrfs_start_transaction(root, 1);
3419         if (IS_ERR(trans)) {
3420                 ret = PTR_ERR(trans);
3421                 fprintf(stderr, "Error starting transaction: %s\n",
3422                         strerror(-ret));
3423                 goto out_free_path;
3424         }
3425         cache = first_cache_extent(corrupt_blocks);
3426         while (cache) {
3427                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3428                                        cache);
3429                 level = corrupt->level;
3430                 path->lowest_level = level;
3431                 key.objectid = corrupt->key.objectid;
3432                 key.type = corrupt->key.type;
3433                 key.offset = corrupt->key.offset;
3434
3435                 /*
3436                  * Here we don't want to do any tree balance, since it may
3437                  * cause a balance with corrupted brother leaf/node,
3438                  * so ins_len set to 0 here.
3439                  * Balance will be done after all corrupt node/leaf is deleted.
3440                  */
3441                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3442                 if (ret < 0)
3443                         goto out;
3444                 offset = btrfs_node_blockptr(path->nodes[level],
3445                                              path->slots[level]);
3446
3447                 /* Remove the ptr */
3448                 ret = btrfs_del_ptr(trans, root, path, level,
3449                                     path->slots[level]);
3450                 if (ret < 0)
3451                         goto out;
3452                 /*
3453                  * Remove the corresponding extent
3454                  * return value is not concerned.
3455                  */
3456                 btrfs_release_path(path);
3457                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3458                                         0, root->root_key.objectid,
3459                                         level - 1, 0);
3460                 cache = next_cache_extent(cache);
3461         }
3462
3463         /* Balance the btree using btrfs_search_slot() */
3464         cache = first_cache_extent(corrupt_blocks);
3465         while (cache) {
3466                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3467                                        cache);
3468                 memcpy(&key, &corrupt->key, sizeof(key));
3469                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3470                 if (ret < 0)
3471                         goto out;
3472                 /* return will always >0 since it won't find the item */
3473                 ret = 0;
3474                 btrfs_release_path(path);
3475                 cache = next_cache_extent(cache);
3476         }
3477 out:
3478         btrfs_commit_transaction(trans, root);
3479 out_free_path:
3480         btrfs_free_path(path);
3481         return ret;
3482 }
3483
3484 static int check_fs_root(struct btrfs_root *root,
3485                          struct cache_tree *root_cache,
3486                          struct walk_control *wc)
3487 {
3488         int ret = 0;
3489         int err = 0;
3490         int wret;
3491         int level;
3492         struct btrfs_path path;
3493         struct shared_node root_node;
3494         struct root_record *rec;
3495         struct btrfs_root_item *root_item = &root->root_item;
3496         struct cache_tree corrupt_blocks;
3497         struct orphan_data_extent *orphan;
3498         struct orphan_data_extent *tmp;
3499         enum btrfs_tree_block_status status;
3500
3501         /*
3502          * Reuse the corrupt_block cache tree to record corrupted tree block
3503          *
3504          * Unlike the usage in extent tree check, here we do it in a per
3505          * fs/subvol tree base.
3506          */
3507         cache_tree_init(&corrupt_blocks);
3508         root->fs_info->corrupt_blocks = &corrupt_blocks;
3509
3510         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3511                 rec = get_root_rec(root_cache, root->root_key.objectid);
3512                 BUG_ON(IS_ERR(rec));
3513                 if (btrfs_root_refs(root_item) > 0)
3514                         rec->found_root_item = 1;
3515         }
3516
3517         btrfs_init_path(&path);
3518         memset(&root_node, 0, sizeof(root_node));
3519         cache_tree_init(&root_node.root_cache);
3520         cache_tree_init(&root_node.inode_cache);
3521
3522         /* Move the orphan extent record to corresponding inode_record */
3523         list_for_each_entry_safe(orphan, tmp,
3524                                  &root->orphan_data_extents, list) {
3525                 struct inode_record *inode;
3526
3527                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3528                                       1);
3529                 BUG_ON(IS_ERR(inode));
3530                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3531                 list_move(&orphan->list, &inode->orphan_extents);
3532         }
3533
3534         level = btrfs_header_level(root->node);
3535         memset(wc->nodes, 0, sizeof(wc->nodes));
3536         wc->nodes[level] = &root_node;
3537         wc->active_node = level;
3538         wc->root_level = level;
3539
3540         /* We may not have checked the root block, lets do that now */
3541         if (btrfs_is_leaf(root->node))
3542                 status = btrfs_check_leaf(root, NULL, root->node);
3543         else
3544                 status = btrfs_check_node(root, NULL, root->node);
3545         if (status != BTRFS_TREE_BLOCK_CLEAN)
3546                 return -EIO;
3547
3548         if (btrfs_root_refs(root_item) > 0 ||
3549             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3550                 path.nodes[level] = root->node;
3551                 extent_buffer_get(root->node);
3552                 path.slots[level] = 0;
3553         } else {
3554                 struct btrfs_key key;
3555                 struct btrfs_disk_key found_key;
3556
3557                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3558                 level = root_item->drop_level;
3559                 path.lowest_level = level;
3560                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3561                 if (wret < 0)
3562                         goto skip_walking;
3563                 btrfs_node_key(path.nodes[level], &found_key,
3564                                 path.slots[level]);
3565                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3566                                         sizeof(found_key)));
3567         }
3568
3569         while (1) {
3570                 wret = walk_down_tree(root, &path, wc, &level);
3571                 if (wret < 0)
3572                         ret = wret;
3573                 if (wret != 0)
3574                         break;
3575
3576                 wret = walk_up_tree(root, &path, wc, &level);
3577                 if (wret < 0)
3578                         ret = wret;
3579                 if (wret != 0)
3580                         break;
3581         }
3582 skip_walking:
3583         btrfs_release_path(&path);
3584
3585         if (!cache_tree_empty(&corrupt_blocks)) {
3586                 struct cache_extent *cache;
3587                 struct btrfs_corrupt_block *corrupt;
3588
3589                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3590                        root->root_key.objectid);
3591                 cache = first_cache_extent(&corrupt_blocks);
3592                 while (cache) {
3593                         corrupt = container_of(cache,
3594                                                struct btrfs_corrupt_block,
3595                                                cache);
3596                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3597                                cache->start, corrupt->level,
3598                                corrupt->key.objectid, corrupt->key.type,
3599                                corrupt->key.offset);
3600                         cache = next_cache_extent(cache);
3601                 }
3602                 if (repair) {
3603                         printf("Try to repair the btree for root %llu\n",
3604                                root->root_key.objectid);
3605                         ret = repair_btree(root, &corrupt_blocks);
3606                         if (ret < 0)
3607                                 fprintf(stderr, "Failed to repair btree: %s\n",
3608                                         strerror(-ret));
3609                         if (!ret)
3610                                 printf("Btree for root %llu is fixed\n",
3611                                        root->root_key.objectid);
3612                 }
3613         }
3614
3615         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3616         if (err < 0)
3617                 ret = err;
3618
3619         if (root_node.current) {
3620                 root_node.current->checked = 1;
3621                 maybe_free_inode_rec(&root_node.inode_cache,
3622                                 root_node.current);
3623         }
3624
3625         err = check_inode_recs(root, &root_node.inode_cache);
3626         if (!ret)
3627                 ret = err;
3628
3629         free_corrupt_blocks_tree(&corrupt_blocks);
3630         root->fs_info->corrupt_blocks = NULL;
3631         free_orphan_data_extents(&root->orphan_data_extents);
3632         return ret;
3633 }
3634
3635 static int fs_root_objectid(u64 objectid)
3636 {
3637         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3638             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3639                 return 1;
3640         return is_fstree(objectid);
3641 }
3642
3643 static int check_fs_roots(struct btrfs_root *root,
3644                           struct cache_tree *root_cache)
3645 {
3646         struct btrfs_path path;
3647         struct btrfs_key key;
3648         struct walk_control wc;
3649         struct extent_buffer *leaf, *tree_node;
3650         struct btrfs_root *tmp_root;
3651         struct btrfs_root *tree_root = root->fs_info->tree_root;
3652         int ret;
3653         int err = 0;
3654
3655         if (ctx.progress_enabled) {
3656                 ctx.tp = TASK_FS_ROOTS;
3657                 task_start(ctx.info);
3658         }
3659
3660         /*
3661          * Just in case we made any changes to the extent tree that weren't
3662          * reflected into the free space cache yet.
3663          */
3664         if (repair)
3665                 reset_cached_block_groups(root->fs_info);
3666         memset(&wc, 0, sizeof(wc));
3667         cache_tree_init(&wc.shared);
3668         btrfs_init_path(&path);
3669
3670 again:
3671         key.offset = 0;
3672         key.objectid = 0;
3673         key.type = BTRFS_ROOT_ITEM_KEY;
3674         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3675         if (ret < 0) {
3676                 err = 1;
3677                 goto out;
3678         }
3679         tree_node = tree_root->node;
3680         while (1) {
3681                 if (tree_node != tree_root->node) {
3682                         free_root_recs_tree(root_cache);
3683                         btrfs_release_path(&path);
3684                         goto again;
3685                 }
3686                 leaf = path.nodes[0];
3687                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3688                         ret = btrfs_next_leaf(tree_root, &path);
3689                         if (ret) {
3690                                 if (ret < 0)
3691                                         err = 1;
3692                                 break;
3693                         }
3694                         leaf = path.nodes[0];
3695                 }
3696                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3697                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3698                     fs_root_objectid(key.objectid)) {
3699                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3700                                 tmp_root = btrfs_read_fs_root_no_cache(
3701                                                 root->fs_info, &key);
3702                         } else {
3703                                 key.offset = (u64)-1;
3704                                 tmp_root = btrfs_read_fs_root(
3705                                                 root->fs_info, &key);
3706                         }
3707                         if (IS_ERR(tmp_root)) {
3708                                 err = 1;
3709                                 goto next;
3710                         }
3711                         ret = check_fs_root(tmp_root, root_cache, &wc);
3712                         if (ret == -EAGAIN) {
3713                                 free_root_recs_tree(root_cache);
3714                                 btrfs_release_path(&path);
3715                                 goto again;
3716                         }
3717                         if (ret)
3718                                 err = 1;
3719                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3720                                 btrfs_free_fs_root(tmp_root);
3721                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3722                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3723                         process_root_ref(leaf, path.slots[0], &key,
3724                                          root_cache);
3725                 }
3726 next:
3727                 path.slots[0]++;
3728         }
3729 out:
3730         btrfs_release_path(&path);
3731         if (err)
3732                 free_extent_cache_tree(&wc.shared);
3733         if (!cache_tree_empty(&wc.shared))
3734                 fprintf(stderr, "warning line %d\n", __LINE__);
3735
3736         task_stop(ctx.info);
3737
3738         return err;
3739 }
3740
3741 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3742 {
3743         struct list_head *cur = rec->backrefs.next;
3744         struct extent_backref *back;
3745         struct tree_backref *tback;
3746         struct data_backref *dback;
3747         u64 found = 0;
3748         int err = 0;
3749
3750         while(cur != &rec->backrefs) {
3751                 back = list_entry(cur, struct extent_backref, list);
3752                 cur = cur->next;
3753                 if (!back->found_extent_tree) {
3754                         err = 1;
3755                         if (!print_errs)
3756                                 goto out;
3757                         if (back->is_data) {
3758                                 dback = (struct data_backref *)back;
3759                                 fprintf(stderr, "Backref %llu %s %llu"
3760                                         " owner %llu offset %llu num_refs %lu"
3761                                         " not found in extent tree\n",
3762                                         (unsigned long long)rec->start,
3763                                         back->full_backref ?
3764                                         "parent" : "root",
3765                                         back->full_backref ?
3766                                         (unsigned long long)dback->parent:
3767                                         (unsigned long long)dback->root,
3768                                         (unsigned long long)dback->owner,
3769                                         (unsigned long long)dback->offset,
3770                                         (unsigned long)dback->num_refs);
3771                         } else {
3772                                 tback = (struct tree_backref *)back;
3773                                 fprintf(stderr, "Backref %llu parent %llu"
3774                                         " root %llu not found in extent tree\n",
3775                                         (unsigned long long)rec->start,
3776                                         (unsigned long long)tback->parent,
3777                                         (unsigned long long)tback->root);
3778                         }
3779                 }
3780                 if (!back->is_data && !back->found_ref) {
3781                         err = 1;
3782                         if (!print_errs)
3783                                 goto out;
3784                         tback = (struct tree_backref *)back;
3785                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3786                                 (unsigned long long)rec->start,
3787                                 back->full_backref ? "parent" : "root",
3788                                 back->full_backref ?
3789                                 (unsigned long long)tback->parent :
3790                                 (unsigned long long)tback->root, back);
3791                 }
3792                 if (back->is_data) {
3793                         dback = (struct data_backref *)back;
3794                         if (dback->found_ref != dback->num_refs) {
3795                                 err = 1;
3796                                 if (!print_errs)
3797                                         goto out;
3798                                 fprintf(stderr, "Incorrect local backref count"
3799                                         " on %llu %s %llu owner %llu"
3800                                         " offset %llu found %u wanted %u back %p\n",
3801                                         (unsigned long long)rec->start,
3802                                         back->full_backref ?
3803                                         "parent" : "root",
3804                                         back->full_backref ?
3805                                         (unsigned long long)dback->parent:
3806                                         (unsigned long long)dback->root,
3807                                         (unsigned long long)dback->owner,
3808                                         (unsigned long long)dback->offset,
3809                                         dback->found_ref, dback->num_refs, back);
3810                         }
3811                         if (dback->disk_bytenr != rec->start) {
3812                                 err = 1;
3813                                 if (!print_errs)
3814                                         goto out;
3815                                 fprintf(stderr, "Backref disk bytenr does not"
3816                                         " match extent record, bytenr=%llu, "
3817                                         "ref bytenr=%llu\n",
3818                                         (unsigned long long)rec->start,
3819                                         (unsigned long long)dback->disk_bytenr);
3820                         }
3821
3822                         if (dback->bytes != rec->nr) {
3823                                 err = 1;
3824                                 if (!print_errs)
3825                                         goto out;
3826                                 fprintf(stderr, "Backref bytes do not match "
3827                                         "extent backref, bytenr=%llu, ref "
3828                                         "bytes=%llu, backref bytes=%llu\n",
3829                                         (unsigned long long)rec->start,
3830                                         (unsigned long long)rec->nr,
3831                                         (unsigned long long)dback->bytes);
3832                         }
3833                 }
3834                 if (!back->is_data) {
3835                         found += 1;
3836                 } else {
3837                         dback = (struct data_backref *)back;
3838                         found += dback->found_ref;
3839                 }
3840         }
3841         if (found != rec->refs) {
3842                 err = 1;
3843                 if (!print_errs)
3844                         goto out;
3845                 fprintf(stderr, "Incorrect global backref count "
3846                         "on %llu found %llu wanted %llu\n",
3847                         (unsigned long long)rec->start,
3848                         (unsigned long long)found,
3849                         (unsigned long long)rec->refs);
3850         }
3851 out:
3852         return err;
3853 }
3854
3855 static int free_all_extent_backrefs(struct extent_record *rec)
3856 {
3857         struct extent_backref *back;
3858         struct list_head *cur;
3859         while (!list_empty(&rec->backrefs)) {
3860                 cur = rec->backrefs.next;
3861                 back = list_entry(cur, struct extent_backref, list);
3862                 list_del(cur);
3863                 free(back);
3864         }
3865         return 0;
3866 }
3867
3868 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3869                                      struct cache_tree *extent_cache)
3870 {
3871         struct cache_extent *cache;
3872         struct extent_record *rec;
3873
3874         while (1) {
3875                 cache = first_cache_extent(extent_cache);
3876                 if (!cache)
3877                         break;
3878                 rec = container_of(cache, struct extent_record, cache);
3879                 remove_cache_extent(extent_cache, cache);
3880                 free_all_extent_backrefs(rec);
3881                 free(rec);
3882         }
3883 }
3884
3885 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3886                                  struct extent_record *rec)
3887 {
3888         if (rec->content_checked && rec->owner_ref_checked &&
3889             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3890             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3891             !rec->bad_full_backref && !rec->crossing_stripes &&
3892             !rec->wrong_chunk_type) {
3893                 remove_cache_extent(extent_cache, &rec->cache);
3894                 free_all_extent_backrefs(rec);
3895                 list_del_init(&rec->list);
3896                 free(rec);
3897         }
3898         return 0;
3899 }
3900
3901 static int check_owner_ref(struct btrfs_root *root,
3902                             struct extent_record *rec,
3903                             struct extent_buffer *buf)
3904 {
3905         struct extent_backref *node;
3906         struct tree_backref *back;
3907         struct btrfs_root *ref_root;
3908         struct btrfs_key key;
3909         struct btrfs_path path;
3910         struct extent_buffer *parent;
3911         int level;
3912         int found = 0;
3913         int ret;
3914
3915         list_for_each_entry(node, &rec->backrefs, list) {
3916                 if (node->is_data)
3917                         continue;
3918                 if (!node->found_ref)
3919                         continue;
3920                 if (node->full_backref)
3921                         continue;
3922                 back = (struct tree_backref *)node;
3923                 if (btrfs_header_owner(buf) == back->root)
3924                         return 0;
3925         }
3926         BUG_ON(rec->is_root);
3927
3928         /* try to find the block by search corresponding fs tree */
3929         key.objectid = btrfs_header_owner(buf);
3930         key.type = BTRFS_ROOT_ITEM_KEY;
3931         key.offset = (u64)-1;
3932
3933         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3934         if (IS_ERR(ref_root))
3935                 return 1;
3936
3937         level = btrfs_header_level(buf);
3938         if (level == 0)
3939                 btrfs_item_key_to_cpu(buf, &key, 0);
3940         else
3941                 btrfs_node_key_to_cpu(buf, &key, 0);
3942
3943         btrfs_init_path(&path);
3944         path.lowest_level = level + 1;
3945         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3946         if (ret < 0)
3947                 return 0;
3948
3949         parent = path.nodes[level + 1];
3950         if (parent && buf->start == btrfs_node_blockptr(parent,
3951                                                         path.slots[level + 1]))
3952                 found = 1;
3953
3954         btrfs_release_path(&path);
3955         return found ? 0 : 1;
3956 }
3957
3958 static int is_extent_tree_record(struct extent_record *rec)
3959 {
3960         struct list_head *cur = rec->backrefs.next;
3961         struct extent_backref *node;
3962         struct tree_backref *back;
3963         int is_extent = 0;
3964
3965         while(cur != &rec->backrefs) {
3966                 node = list_entry(cur, struct extent_backref, list);
3967                 cur = cur->next;
3968                 if (node->is_data)
3969                         return 0;
3970                 back = (struct tree_backref *)node;
3971                 if (node->full_backref)
3972                         return 0;
3973                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3974                         is_extent = 1;
3975         }
3976         return is_extent;
3977 }
3978
3979
3980 static int record_bad_block_io(struct btrfs_fs_info *info,
3981                                struct cache_tree *extent_cache,
3982                                u64 start, u64 len)
3983 {
3984         struct extent_record *rec;
3985         struct cache_extent *cache;
3986         struct btrfs_key key;
3987
3988         cache = lookup_cache_extent(extent_cache, start, len);
3989         if (!cache)
3990                 return 0;
3991
3992         rec = container_of(cache, struct extent_record, cache);
3993         if (!is_extent_tree_record(rec))
3994                 return 0;
3995
3996         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3997         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3998 }
3999
4000 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4001                        struct extent_buffer *buf, int slot)
4002 {
4003         if (btrfs_header_level(buf)) {
4004                 struct btrfs_key_ptr ptr1, ptr2;
4005
4006                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4007                                    sizeof(struct btrfs_key_ptr));
4008                 read_extent_buffer(buf, &ptr2,
4009                                    btrfs_node_key_ptr_offset(slot + 1),
4010                                    sizeof(struct btrfs_key_ptr));
4011                 write_extent_buffer(buf, &ptr1,
4012                                     btrfs_node_key_ptr_offset(slot + 1),
4013                                     sizeof(struct btrfs_key_ptr));
4014                 write_extent_buffer(buf, &ptr2,
4015                                     btrfs_node_key_ptr_offset(slot),
4016                                     sizeof(struct btrfs_key_ptr));
4017                 if (slot == 0) {
4018                         struct btrfs_disk_key key;
4019                         btrfs_node_key(buf, &key, 0);
4020                         btrfs_fixup_low_keys(root, path, &key,
4021                                              btrfs_header_level(buf) + 1);
4022                 }
4023         } else {
4024                 struct btrfs_item *item1, *item2;
4025                 struct btrfs_key k1, k2;
4026                 char *item1_data, *item2_data;
4027                 u32 item1_offset, item2_offset, item1_size, item2_size;
4028
4029                 item1 = btrfs_item_nr(slot);
4030                 item2 = btrfs_item_nr(slot + 1);
4031                 btrfs_item_key_to_cpu(buf, &k1, slot);
4032                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4033                 item1_offset = btrfs_item_offset(buf, item1);
4034                 item2_offset = btrfs_item_offset(buf, item2);
4035                 item1_size = btrfs_item_size(buf, item1);
4036                 item2_size = btrfs_item_size(buf, item2);
4037
4038                 item1_data = malloc(item1_size);
4039                 if (!item1_data)
4040                         return -ENOMEM;
4041                 item2_data = malloc(item2_size);
4042                 if (!item2_data) {
4043                         free(item1_data);
4044                         return -ENOMEM;
4045                 }
4046
4047                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4048                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4049
4050                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4051                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4052                 free(item1_data);
4053                 free(item2_data);
4054
4055                 btrfs_set_item_offset(buf, item1, item2_offset);
4056                 btrfs_set_item_offset(buf, item2, item1_offset);
4057                 btrfs_set_item_size(buf, item1, item2_size);
4058                 btrfs_set_item_size(buf, item2, item1_size);
4059
4060                 path->slots[0] = slot;
4061                 btrfs_set_item_key_unsafe(root, path, &k2);
4062                 path->slots[0] = slot + 1;
4063                 btrfs_set_item_key_unsafe(root, path, &k1);
4064         }
4065         return 0;
4066 }
4067
4068 static int fix_key_order(struct btrfs_trans_handle *trans,
4069                          struct btrfs_root *root,
4070                          struct btrfs_path *path)
4071 {
4072         struct extent_buffer *buf;
4073         struct btrfs_key k1, k2;
4074         int i;
4075         int level = path->lowest_level;
4076         int ret = -EIO;
4077
4078         buf = path->nodes[level];
4079         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4080                 if (level) {
4081                         btrfs_node_key_to_cpu(buf, &k1, i);
4082                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4083                 } else {
4084                         btrfs_item_key_to_cpu(buf, &k1, i);
4085                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4086                 }
4087                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4088                         continue;
4089                 ret = swap_values(root, path, buf, i);
4090                 if (ret)
4091                         break;
4092                 btrfs_mark_buffer_dirty(buf);
4093                 i = 0;
4094         }
4095         return ret;
4096 }
4097
4098 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4099                              struct btrfs_root *root,
4100                              struct btrfs_path *path,
4101                              struct extent_buffer *buf, int slot)
4102 {
4103         struct btrfs_key key;
4104         int nritems = btrfs_header_nritems(buf);
4105
4106         btrfs_item_key_to_cpu(buf, &key, slot);
4107
4108         /* These are all the keys we can deal with missing. */
4109         if (key.type != BTRFS_DIR_INDEX_KEY &&
4110             key.type != BTRFS_EXTENT_ITEM_KEY &&
4111             key.type != BTRFS_METADATA_ITEM_KEY &&
4112             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4113             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4114                 return -1;
4115
4116         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4117                (unsigned long long)key.objectid, key.type,
4118                (unsigned long long)key.offset, slot, buf->start);
4119         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4120                               btrfs_item_nr_offset(slot + 1),
4121                               sizeof(struct btrfs_item) *
4122                               (nritems - slot - 1));
4123         btrfs_set_header_nritems(buf, nritems - 1);
4124         if (slot == 0) {
4125                 struct btrfs_disk_key disk_key;
4126
4127                 btrfs_item_key(buf, &disk_key, 0);
4128                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4129         }
4130         btrfs_mark_buffer_dirty(buf);
4131         return 0;
4132 }
4133
4134 static int fix_item_offset(struct btrfs_trans_handle *trans,
4135                            struct btrfs_root *root,
4136                            struct btrfs_path *path)
4137 {
4138         struct extent_buffer *buf;
4139         int i;
4140         int ret = 0;
4141
4142         /* We should only get this for leaves */
4143         BUG_ON(path->lowest_level);
4144         buf = path->nodes[0];
4145 again:
4146         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4147                 unsigned int shift = 0, offset;
4148
4149                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4150                     BTRFS_LEAF_DATA_SIZE(root)) {
4151                         if (btrfs_item_end_nr(buf, i) >
4152                             BTRFS_LEAF_DATA_SIZE(root)) {
4153                                 ret = delete_bogus_item(trans, root, path,
4154                                                         buf, i);
4155                                 if (!ret)
4156                                         goto again;
4157                                 fprintf(stderr, "item is off the end of the "
4158                                         "leaf, can't fix\n");
4159                                 ret = -EIO;
4160                                 break;
4161                         }
4162                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4163                                 btrfs_item_end_nr(buf, i);
4164                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4165                            btrfs_item_offset_nr(buf, i - 1)) {
4166                         if (btrfs_item_end_nr(buf, i) >
4167                             btrfs_item_offset_nr(buf, i - 1)) {
4168                                 ret = delete_bogus_item(trans, root, path,
4169                                                         buf, i);
4170                                 if (!ret)
4171                                         goto again;
4172                                 fprintf(stderr, "items overlap, can't fix\n");
4173                                 ret = -EIO;
4174                                 break;
4175                         }
4176                         shift = btrfs_item_offset_nr(buf, i - 1) -
4177                                 btrfs_item_end_nr(buf, i);
4178                 }
4179                 if (!shift)
4180                         continue;
4181
4182                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4183                        i, shift, (unsigned long long)buf->start);
4184                 offset = btrfs_item_offset_nr(buf, i);
4185                 memmove_extent_buffer(buf,
4186                                       btrfs_leaf_data(buf) + offset + shift,
4187                                       btrfs_leaf_data(buf) + offset,
4188                                       btrfs_item_size_nr(buf, i));
4189                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4190                                       offset + shift);
4191                 btrfs_mark_buffer_dirty(buf);
4192         }
4193
4194         /*
4195          * We may have moved things, in which case we want to exit so we don't
4196          * write those changes out.  Once we have proper abort functionality in
4197          * progs this can be changed to something nicer.
4198          */
4199         BUG_ON(ret);
4200         return ret;
4201 }
4202
4203 /*
4204  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4205  * then just return -EIO.
4206  */
4207 static int try_to_fix_bad_block(struct btrfs_root *root,
4208                                 struct extent_buffer *buf,
4209                                 enum btrfs_tree_block_status status)
4210 {
4211         struct btrfs_trans_handle *trans;
4212         struct ulist *roots;
4213         struct ulist_node *node;
4214         struct btrfs_root *search_root;
4215         struct btrfs_path *path;
4216         struct ulist_iterator iter;
4217         struct btrfs_key root_key, key;
4218         int ret;
4219
4220         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4221             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4222                 return -EIO;
4223
4224         path = btrfs_alloc_path();
4225         if (!path)
4226                 return -EIO;
4227
4228         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4229                                    0, &roots);
4230         if (ret) {
4231                 btrfs_free_path(path);
4232                 return -EIO;
4233         }
4234
4235         ULIST_ITER_INIT(&iter);
4236         while ((node = ulist_next(roots, &iter))) {
4237                 root_key.objectid = node->val;
4238                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4239                 root_key.offset = (u64)-1;
4240
4241                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4242                 if (IS_ERR(root)) {
4243                         ret = -EIO;
4244                         break;
4245                 }
4246
4247
4248                 trans = btrfs_start_transaction(search_root, 0);
4249                 if (IS_ERR(trans)) {
4250                         ret = PTR_ERR(trans);
4251                         break;
4252                 }
4253
4254                 path->lowest_level = btrfs_header_level(buf);
4255                 path->skip_check_block = 1;
4256                 if (path->lowest_level)
4257                         btrfs_node_key_to_cpu(buf, &key, 0);
4258                 else
4259                         btrfs_item_key_to_cpu(buf, &key, 0);
4260                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4261                 if (ret) {
4262                         ret = -EIO;
4263                         btrfs_commit_transaction(trans, search_root);
4264                         break;
4265                 }
4266                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4267                         ret = fix_key_order(trans, search_root, path);
4268                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4269                         ret = fix_item_offset(trans, search_root, path);
4270                 if (ret) {
4271                         btrfs_commit_transaction(trans, search_root);
4272                         break;
4273                 }
4274                 btrfs_release_path(path);
4275                 btrfs_commit_transaction(trans, search_root);
4276         }
4277         ulist_free(roots);
4278         btrfs_free_path(path);
4279         return ret;
4280 }
4281
4282 static int check_block(struct btrfs_root *root,
4283                        struct cache_tree *extent_cache,
4284                        struct extent_buffer *buf, u64 flags)
4285 {
4286         struct extent_record *rec;
4287         struct cache_extent *cache;
4288         struct btrfs_key key;
4289         enum btrfs_tree_block_status status;
4290         int ret = 0;
4291         int level;
4292
4293         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4294         if (!cache)
4295                 return 1;
4296         rec = container_of(cache, struct extent_record, cache);
4297         rec->generation = btrfs_header_generation(buf);
4298
4299         level = btrfs_header_level(buf);
4300         if (btrfs_header_nritems(buf) > 0) {
4301
4302                 if (level == 0)
4303                         btrfs_item_key_to_cpu(buf, &key, 0);
4304                 else
4305                         btrfs_node_key_to_cpu(buf, &key, 0);
4306
4307                 rec->info_objectid = key.objectid;
4308         }
4309         rec->info_level = level;
4310
4311         if (btrfs_is_leaf(buf))
4312                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4313         else
4314                 status = btrfs_check_node(root, &rec->parent_key, buf);
4315
4316         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4317                 if (repair)
4318                         status = try_to_fix_bad_block(root, buf, status);
4319                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4320                         ret = -EIO;
4321                         fprintf(stderr, "bad block %llu\n",
4322                                 (unsigned long long)buf->start);
4323                 } else {
4324                         /*
4325                          * Signal to callers we need to start the scan over
4326                          * again since we'll have cow'ed blocks.
4327                          */
4328                         ret = -EAGAIN;
4329                 }
4330         } else {
4331                 rec->content_checked = 1;
4332                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4333                         rec->owner_ref_checked = 1;
4334                 else {
4335                         ret = check_owner_ref(root, rec, buf);
4336                         if (!ret)
4337                                 rec->owner_ref_checked = 1;
4338                 }
4339         }
4340         if (!ret)
4341                 maybe_free_extent_rec(extent_cache, rec);
4342         return ret;
4343 }
4344
4345 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4346                                                 u64 parent, u64 root)
4347 {
4348         struct list_head *cur = rec->backrefs.next;
4349         struct extent_backref *node;
4350         struct tree_backref *back;
4351
4352         while(cur != &rec->backrefs) {
4353                 node = list_entry(cur, struct extent_backref, list);
4354                 cur = cur->next;
4355                 if (node->is_data)
4356                         continue;
4357                 back = (struct tree_backref *)node;
4358                 if (parent > 0) {
4359                         if (!node->full_backref)
4360                                 continue;
4361                         if (parent == back->parent)
4362                                 return back;
4363                 } else {
4364                         if (node->full_backref)
4365                                 continue;
4366                         if (back->root == root)
4367                                 return back;
4368                 }
4369         }
4370         return NULL;
4371 }
4372
4373 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4374                                                 u64 parent, u64 root)
4375 {
4376         struct tree_backref *ref = malloc(sizeof(*ref));
4377
4378         if (!ref)
4379                 return NULL;
4380         memset(&ref->node, 0, sizeof(ref->node));
4381         if (parent > 0) {
4382                 ref->parent = parent;
4383                 ref->node.full_backref = 1;
4384         } else {
4385                 ref->root = root;
4386                 ref->node.full_backref = 0;
4387         }
4388         list_add_tail(&ref->node.list, &rec->backrefs);
4389
4390         return ref;
4391 }
4392
4393 static struct data_backref *find_data_backref(struct extent_record *rec,
4394                                                 u64 parent, u64 root,
4395                                                 u64 owner, u64 offset,
4396                                                 int found_ref,
4397                                                 u64 disk_bytenr, u64 bytes)
4398 {
4399         struct list_head *cur = rec->backrefs.next;
4400         struct extent_backref *node;
4401         struct data_backref *back;
4402
4403         while(cur != &rec->backrefs) {
4404                 node = list_entry(cur, struct extent_backref, list);
4405                 cur = cur->next;
4406                 if (!node->is_data)
4407                         continue;
4408                 back = (struct data_backref *)node;
4409                 if (parent > 0) {
4410                         if (!node->full_backref)
4411                                 continue;
4412                         if (parent == back->parent)
4413                                 return back;
4414                 } else {
4415                         if (node->full_backref)
4416                                 continue;
4417                         if (back->root == root && back->owner == owner &&
4418                             back->offset == offset) {
4419                                 if (found_ref && node->found_ref &&
4420                                     (back->bytes != bytes ||
4421                                     back->disk_bytenr != disk_bytenr))
4422                                         continue;
4423                                 return back;
4424                         }
4425                 }
4426         }
4427         return NULL;
4428 }
4429
4430 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4431                                                 u64 parent, u64 root,
4432                                                 u64 owner, u64 offset,
4433                                                 u64 max_size)
4434 {
4435         struct data_backref *ref = malloc(sizeof(*ref));
4436
4437         if (!ref)
4438                 return NULL;
4439         memset(&ref->node, 0, sizeof(ref->node));
4440         ref->node.is_data = 1;
4441
4442         if (parent > 0) {
4443                 ref->parent = parent;
4444                 ref->owner = 0;
4445                 ref->offset = 0;
4446                 ref->node.full_backref = 1;
4447         } else {
4448                 ref->root = root;
4449                 ref->owner = owner;
4450                 ref->offset = offset;
4451                 ref->node.full_backref = 0;
4452         }
4453         ref->bytes = max_size;
4454         ref->found_ref = 0;
4455         ref->num_refs = 0;
4456         list_add_tail(&ref->node.list, &rec->backrefs);
4457         if (max_size > rec->max_size)
4458                 rec->max_size = max_size;
4459         return ref;
4460 }
4461
4462 /* Check if the type of extent matches with its chunk */
4463 static void check_extent_type(struct extent_record *rec)
4464 {
4465         struct btrfs_block_group_cache *bg_cache;
4466
4467         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4468         if (!bg_cache)
4469                 return;
4470
4471         /* data extent, check chunk directly*/
4472         if (!rec->metadata) {
4473                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4474                         rec->wrong_chunk_type = 1;
4475                 return;
4476         }
4477
4478         /* metadata extent, check the obvious case first */
4479         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4480                                  BTRFS_BLOCK_GROUP_METADATA))) {
4481                 rec->wrong_chunk_type = 1;
4482                 return;
4483         }
4484
4485         /*
4486          * Check SYSTEM extent, as it's also marked as metadata, we can only
4487          * make sure it's a SYSTEM extent by its backref
4488          */
4489         if (!list_empty(&rec->backrefs)) {
4490                 struct extent_backref *node;
4491                 struct tree_backref *tback;
4492                 u64 bg_type;
4493
4494                 node = list_entry(rec->backrefs.next, struct extent_backref,
4495                                   list);
4496                 if (node->is_data) {
4497                         /* tree block shouldn't have data backref */
4498                         rec->wrong_chunk_type = 1;
4499                         return;
4500                 }
4501                 tback = container_of(node, struct tree_backref, node);
4502
4503                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4504                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4505                 else
4506                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4507                 if (!(bg_cache->flags & bg_type))
4508                         rec->wrong_chunk_type = 1;
4509         }
4510 }
4511
4512 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4513                           struct btrfs_key *parent_key, u64 parent_gen,
4514                           u64 start, u64 nr, u64 extent_item_refs,
4515                           int is_root, int inc_ref, int set_checked,
4516                           int metadata, int extent_rec, u64 max_size)
4517 {
4518         struct extent_record *rec;
4519         int ret = 0;
4520
4521         rec = malloc(sizeof(*rec));
4522         if (!rec)
4523                 return -ENOMEM;
4524         rec->start = start;
4525         rec->max_size = max_size;
4526         rec->nr = max(nr, max_size);
4527         rec->found_rec = !!extent_rec;
4528         rec->content_checked = 0;
4529         rec->owner_ref_checked = 0;
4530         rec->num_duplicates = 0;
4531         rec->metadata = metadata;
4532         rec->flag_block_full_backref = -1;
4533         rec->bad_full_backref = 0;
4534         rec->crossing_stripes = 0;
4535         rec->wrong_chunk_type = 0;
4536         INIT_LIST_HEAD(&rec->backrefs);
4537         INIT_LIST_HEAD(&rec->dups);
4538         INIT_LIST_HEAD(&rec->list);
4539
4540         if (is_root)
4541                 rec->is_root = 1;
4542         else
4543                 rec->is_root = 0;
4544
4545         if (inc_ref)
4546                 rec->refs = 1;
4547         else
4548                 rec->refs = 0;
4549
4550         if (extent_item_refs)
4551                 rec->extent_item_refs = extent_item_refs;
4552         else
4553                 rec->extent_item_refs = 0;
4554
4555         if (parent_key)
4556                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4557         else
4558                 memset(&rec->parent_key, 0, sizeof(*parent_key));
4559
4560         if (parent_gen)
4561                 rec->parent_generation = parent_gen;
4562         else
4563                 rec->parent_generation = 0;
4564
4565         rec->cache.start = start;
4566         rec->cache.size = nr;
4567         ret = insert_cache_extent(extent_cache, &rec->cache);
4568         BUG_ON(ret);
4569         bytes_used += nr;
4570         if (set_checked) {
4571                 rec->content_checked = 1;
4572                 rec->owner_ref_checked = 1;
4573         }
4574
4575         if (metadata)
4576                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4577                                 rec->max_size);
4578         check_extent_type(rec);
4579         return ret;
4580 }
4581
4582 static int add_extent_rec(struct cache_tree *extent_cache,
4583                           struct btrfs_key *parent_key, u64 parent_gen,
4584                           u64 start, u64 nr, u64 extent_item_refs,
4585                           int is_root, int inc_ref, int set_checked,
4586                           int metadata, int extent_rec, u64 max_size)
4587 {
4588         struct extent_record *rec;
4589         struct cache_extent *cache;
4590         int ret = 0;
4591         int dup = 0;
4592
4593         cache = lookup_cache_extent(extent_cache, start, nr);
4594         if (cache) {
4595                 rec = container_of(cache, struct extent_record, cache);
4596                 if (inc_ref)
4597                         rec->refs++;
4598                 if (rec->nr == 1)
4599                         rec->nr = max(nr, max_size);
4600
4601                 /*
4602                  * We need to make sure to reset nr to whatever the extent
4603                  * record says was the real size, this way we can compare it to
4604                  * the backrefs.
4605                  */
4606                 if (extent_rec) {
4607                         if (start != rec->start || rec->found_rec) {
4608                                 struct extent_record *tmp;
4609
4610                                 dup = 1;
4611                                 if (list_empty(&rec->list))
4612                                         list_add_tail(&rec->list,
4613                                                       &duplicate_extents);
4614
4615                                 /*
4616                                  * We have to do this song and dance in case we
4617                                  * find an extent record that falls inside of
4618                                  * our current extent record but does not have
4619                                  * the same objectid.
4620                                  */
4621                                 tmp = malloc(sizeof(*tmp));
4622                                 if (!tmp)
4623                                         return -ENOMEM;
4624                                 tmp->start = start;
4625                                 tmp->max_size = max_size;
4626                                 tmp->nr = nr;
4627                                 tmp->found_rec = 1;
4628                                 tmp->metadata = metadata;
4629                                 tmp->extent_item_refs = extent_item_refs;
4630                                 INIT_LIST_HEAD(&tmp->list);
4631                                 list_add_tail(&tmp->list, &rec->dups);
4632                                 rec->num_duplicates++;
4633                         } else {
4634                                 rec->nr = nr;
4635                                 rec->found_rec = 1;
4636                         }
4637                 }
4638
4639                 if (extent_item_refs && !dup) {
4640                         if (rec->extent_item_refs) {
4641                                 fprintf(stderr, "block %llu rec "
4642                                         "extent_item_refs %llu, passed %llu\n",
4643                                         (unsigned long long)start,
4644                                         (unsigned long long)
4645                                                         rec->extent_item_refs,
4646                                         (unsigned long long)extent_item_refs);
4647                         }
4648                         rec->extent_item_refs = extent_item_refs;
4649                 }
4650                 if (is_root)
4651                         rec->is_root = 1;
4652                 if (set_checked) {
4653                         rec->content_checked = 1;
4654                         rec->owner_ref_checked = 1;
4655                 }
4656
4657                 if (parent_key)
4658                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4659                 if (parent_gen)
4660                         rec->parent_generation = parent_gen;
4661
4662                 if (rec->max_size < max_size)
4663                         rec->max_size = max_size;
4664
4665                 /*
4666                  * A metadata extent can't cross stripe_len boundary, otherwise
4667                  * kernel scrub won't be able to handle it.
4668                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4669                  * it.
4670                  */
4671                 if (metadata)
4672                         rec->crossing_stripes = check_crossing_stripes(
4673                                         rec->start, rec->max_size);
4674                 check_extent_type(rec);
4675                 maybe_free_extent_rec(extent_cache, rec);
4676                 return ret;
4677         }
4678
4679         ret = add_extent_rec_nolookup(extent_cache, parent_key, parent_gen,
4680                         start, nr, extent_item_refs, is_root, inc_ref,
4681                         set_checked, metadata, extent_rec, max_size);
4682
4683         return ret;
4684 }
4685
4686 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4687                             u64 parent, u64 root, int found_ref)
4688 {
4689         struct extent_record *rec;
4690         struct tree_backref *back;
4691         struct cache_extent *cache;
4692
4693         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4694         if (!cache) {
4695                 add_extent_rec_nolookup(extent_cache, NULL, 0, bytenr,
4696                                1, 0, 0, 0, 0, 1, 0, 0);
4697                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4698                 if (!cache)
4699                         abort();
4700         }
4701
4702         rec = container_of(cache, struct extent_record, cache);
4703         if (rec->start != bytenr) {
4704                 abort();
4705         }
4706
4707         back = find_tree_backref(rec, parent, root);
4708         if (!back) {
4709                 back = alloc_tree_backref(rec, parent, root);
4710                 BUG_ON(!back);
4711         }
4712
4713         if (found_ref) {
4714                 if (back->node.found_ref) {
4715                         fprintf(stderr, "Extent back ref already exists "
4716                                 "for %llu parent %llu root %llu \n",
4717                                 (unsigned long long)bytenr,
4718                                 (unsigned long long)parent,
4719                                 (unsigned long long)root);
4720                 }
4721                 back->node.found_ref = 1;
4722         } else {
4723                 if (back->node.found_extent_tree) {
4724                         fprintf(stderr, "Extent back ref already exists "
4725                                 "for %llu parent %llu root %llu \n",
4726                                 (unsigned long long)bytenr,
4727                                 (unsigned long long)parent,
4728                                 (unsigned long long)root);
4729                 }
4730                 back->node.found_extent_tree = 1;
4731         }
4732         check_extent_type(rec);
4733         maybe_free_extent_rec(extent_cache, rec);
4734         return 0;
4735 }
4736
4737 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4738                             u64 parent, u64 root, u64 owner, u64 offset,
4739                             u32 num_refs, int found_ref, u64 max_size)
4740 {
4741         struct extent_record *rec;
4742         struct data_backref *back;
4743         struct cache_extent *cache;
4744
4745         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4746         if (!cache) {
4747                 add_extent_rec_nolookup(extent_cache, NULL, 0, bytenr, 1, 0, 0,
4748                                 0, 0, 0, 0, max_size);
4749                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4750                 if (!cache)
4751                         abort();
4752         }
4753
4754         rec = container_of(cache, struct extent_record, cache);
4755         if (rec->max_size < max_size)
4756                 rec->max_size = max_size;
4757
4758         /*
4759          * If found_ref is set then max_size is the real size and must match the
4760          * existing refs.  So if we have already found a ref then we need to
4761          * make sure that this ref matches the existing one, otherwise we need
4762          * to add a new backref so we can notice that the backrefs don't match
4763          * and we need to figure out who is telling the truth.  This is to
4764          * account for that awful fsync bug I introduced where we'd end up with
4765          * a btrfs_file_extent_item that would have its length include multiple
4766          * prealloc extents or point inside of a prealloc extent.
4767          */
4768         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4769                                  bytenr, max_size);
4770         if (!back) {
4771                 back = alloc_data_backref(rec, parent, root, owner, offset,
4772                                           max_size);
4773                 BUG_ON(!back);
4774         }
4775
4776         if (found_ref) {
4777                 BUG_ON(num_refs != 1);
4778                 if (back->node.found_ref)
4779                         BUG_ON(back->bytes != max_size);
4780                 back->node.found_ref = 1;
4781                 back->found_ref += 1;
4782                 back->bytes = max_size;
4783                 back->disk_bytenr = bytenr;
4784                 rec->refs += 1;
4785                 rec->content_checked = 1;
4786                 rec->owner_ref_checked = 1;
4787         } else {
4788                 if (back->node.found_extent_tree) {
4789                         fprintf(stderr, "Extent back ref already exists "
4790                                 "for %llu parent %llu root %llu "
4791                                 "owner %llu offset %llu num_refs %lu\n",
4792                                 (unsigned long long)bytenr,
4793                                 (unsigned long long)parent,
4794                                 (unsigned long long)root,
4795                                 (unsigned long long)owner,
4796                                 (unsigned long long)offset,
4797                                 (unsigned long)num_refs);
4798                 }
4799                 back->num_refs = num_refs;
4800                 back->node.found_extent_tree = 1;
4801         }
4802         maybe_free_extent_rec(extent_cache, rec);
4803         return 0;
4804 }
4805
4806 static int add_pending(struct cache_tree *pending,
4807                        struct cache_tree *seen, u64 bytenr, u32 size)
4808 {
4809         int ret;
4810         ret = add_cache_extent(seen, bytenr, size);
4811         if (ret)
4812                 return ret;
4813         add_cache_extent(pending, bytenr, size);
4814         return 0;
4815 }
4816
4817 static int pick_next_pending(struct cache_tree *pending,
4818                         struct cache_tree *reada,
4819                         struct cache_tree *nodes,
4820                         u64 last, struct block_info *bits, int bits_nr,
4821                         int *reada_bits)
4822 {
4823         unsigned long node_start = last;
4824         struct cache_extent *cache;
4825         int ret;
4826
4827         cache = search_cache_extent(reada, 0);
4828         if (cache) {
4829                 bits[0].start = cache->start;
4830                 bits[0].size = cache->size;
4831                 *reada_bits = 1;
4832                 return 1;
4833         }
4834         *reada_bits = 0;
4835         if (node_start > 32768)
4836                 node_start -= 32768;
4837
4838         cache = search_cache_extent(nodes, node_start);
4839         if (!cache)
4840                 cache = search_cache_extent(nodes, 0);
4841
4842         if (!cache) {
4843                  cache = search_cache_extent(pending, 0);
4844                  if (!cache)
4845                          return 0;
4846                  ret = 0;
4847                  do {
4848                          bits[ret].start = cache->start;
4849                          bits[ret].size = cache->size;
4850                          cache = next_cache_extent(cache);
4851                          ret++;
4852                  } while (cache && ret < bits_nr);
4853                  return ret;
4854         }
4855
4856         ret = 0;
4857         do {
4858                 bits[ret].start = cache->start;
4859                 bits[ret].size = cache->size;
4860                 cache = next_cache_extent(cache);
4861                 ret++;
4862         } while (cache && ret < bits_nr);
4863
4864         if (bits_nr - ret > 8) {
4865                 u64 lookup = bits[0].start + bits[0].size;
4866                 struct cache_extent *next;
4867                 next = search_cache_extent(pending, lookup);
4868                 while(next) {
4869                         if (next->start - lookup > 32768)
4870                                 break;
4871                         bits[ret].start = next->start;
4872                         bits[ret].size = next->size;
4873                         lookup = next->start + next->size;
4874                         ret++;
4875                         if (ret == bits_nr)
4876                                 break;
4877                         next = next_cache_extent(next);
4878                         if (!next)
4879                                 break;
4880                 }
4881         }
4882         return ret;
4883 }
4884
4885 static void free_chunk_record(struct cache_extent *cache)
4886 {
4887         struct chunk_record *rec;
4888
4889         rec = container_of(cache, struct chunk_record, cache);
4890         list_del_init(&rec->list);
4891         list_del_init(&rec->dextents);
4892         free(rec);
4893 }
4894
4895 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4896 {
4897         cache_tree_free_extents(chunk_cache, free_chunk_record);
4898 }
4899
4900 static void free_device_record(struct rb_node *node)
4901 {
4902         struct device_record *rec;
4903
4904         rec = container_of(node, struct device_record, node);
4905         free(rec);
4906 }
4907
4908 FREE_RB_BASED_TREE(device_cache, free_device_record);
4909
4910 int insert_block_group_record(struct block_group_tree *tree,
4911                               struct block_group_record *bg_rec)
4912 {
4913         int ret;
4914
4915         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4916         if (ret)
4917                 return ret;
4918
4919         list_add_tail(&bg_rec->list, &tree->block_groups);
4920         return 0;
4921 }
4922
4923 static void free_block_group_record(struct cache_extent *cache)
4924 {
4925         struct block_group_record *rec;
4926
4927         rec = container_of(cache, struct block_group_record, cache);
4928         list_del_init(&rec->list);
4929         free(rec);
4930 }
4931
4932 void free_block_group_tree(struct block_group_tree *tree)
4933 {
4934         cache_tree_free_extents(&tree->tree, free_block_group_record);
4935 }
4936
4937 int insert_device_extent_record(struct device_extent_tree *tree,
4938                                 struct device_extent_record *de_rec)
4939 {
4940         int ret;
4941
4942         /*
4943          * Device extent is a bit different from the other extents, because
4944          * the extents which belong to the different devices may have the
4945          * same start and size, so we need use the special extent cache
4946          * search/insert functions.
4947          */
4948         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4949         if (ret)
4950                 return ret;
4951
4952         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4953         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4954         return 0;
4955 }
4956
4957 static void free_device_extent_record(struct cache_extent *cache)
4958 {
4959         struct device_extent_record *rec;
4960
4961         rec = container_of(cache, struct device_extent_record, cache);
4962         if (!list_empty(&rec->chunk_list))
4963                 list_del_init(&rec->chunk_list);
4964         if (!list_empty(&rec->device_list))
4965                 list_del_init(&rec->device_list);
4966         free(rec);
4967 }
4968
4969 void free_device_extent_tree(struct device_extent_tree *tree)
4970 {
4971         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4972 }
4973
4974 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4975 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4976                                  struct extent_buffer *leaf, int slot)
4977 {
4978         struct btrfs_extent_ref_v0 *ref0;
4979         struct btrfs_key key;
4980
4981         btrfs_item_key_to_cpu(leaf, &key, slot);
4982         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4983         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4984                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4985         } else {
4986                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4987                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4988         }
4989         return 0;
4990 }
4991 #endif
4992
4993 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4994                                             struct btrfs_key *key,
4995                                             int slot)
4996 {
4997         struct btrfs_chunk *ptr;
4998         struct chunk_record *rec;
4999         int num_stripes, i;
5000
5001         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5002         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5003
5004         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5005         if (!rec) {
5006                 fprintf(stderr, "memory allocation failed\n");
5007                 exit(-1);
5008         }
5009
5010         INIT_LIST_HEAD(&rec->list);
5011         INIT_LIST_HEAD(&rec->dextents);
5012         rec->bg_rec = NULL;
5013
5014         rec->cache.start = key->offset;
5015         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5016
5017         rec->generation = btrfs_header_generation(leaf);
5018
5019         rec->objectid = key->objectid;
5020         rec->type = key->type;
5021         rec->offset = key->offset;
5022
5023         rec->length = rec->cache.size;
5024         rec->owner = btrfs_chunk_owner(leaf, ptr);
5025         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5026         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5027         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5028         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5029         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5030         rec->num_stripes = num_stripes;
5031         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5032
5033         for (i = 0; i < rec->num_stripes; ++i) {
5034                 rec->stripes[i].devid =
5035                         btrfs_stripe_devid_nr(leaf, ptr, i);
5036                 rec->stripes[i].offset =
5037                         btrfs_stripe_offset_nr(leaf, ptr, i);
5038                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5039                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5040                                 BTRFS_UUID_SIZE);
5041         }
5042
5043         return rec;
5044 }
5045
5046 static int process_chunk_item(struct cache_tree *chunk_cache,
5047                               struct btrfs_key *key, struct extent_buffer *eb,
5048                               int slot)
5049 {
5050         struct chunk_record *rec;
5051         int ret = 0;
5052
5053         rec = btrfs_new_chunk_record(eb, key, slot);
5054         ret = insert_cache_extent(chunk_cache, &rec->cache);
5055         if (ret) {
5056                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5057                         rec->offset, rec->length);
5058                 free(rec);
5059         }
5060
5061         return ret;
5062 }
5063
5064 static int process_device_item(struct rb_root *dev_cache,
5065                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5066 {
5067         struct btrfs_dev_item *ptr;
5068         struct device_record *rec;
5069         int ret = 0;
5070
5071         ptr = btrfs_item_ptr(eb,
5072                 slot, struct btrfs_dev_item);
5073
5074         rec = malloc(sizeof(*rec));
5075         if (!rec) {
5076                 fprintf(stderr, "memory allocation failed\n");
5077                 return -ENOMEM;
5078         }
5079
5080         rec->devid = key->offset;
5081         rec->generation = btrfs_header_generation(eb);
5082
5083         rec->objectid = key->objectid;
5084         rec->type = key->type;
5085         rec->offset = key->offset;
5086
5087         rec->devid = btrfs_device_id(eb, ptr);
5088         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5089         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5090
5091         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5092         if (ret) {
5093                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5094                 free(rec);
5095         }
5096
5097         return ret;
5098 }
5099
5100 struct block_group_record *
5101 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5102                              int slot)
5103 {
5104         struct btrfs_block_group_item *ptr;
5105         struct block_group_record *rec;
5106
5107         rec = calloc(1, sizeof(*rec));
5108         if (!rec) {
5109                 fprintf(stderr, "memory allocation failed\n");
5110                 exit(-1);
5111         }
5112
5113         rec->cache.start = key->objectid;
5114         rec->cache.size = key->offset;
5115
5116         rec->generation = btrfs_header_generation(leaf);
5117
5118         rec->objectid = key->objectid;
5119         rec->type = key->type;
5120         rec->offset = key->offset;
5121
5122         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5123         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5124
5125         INIT_LIST_HEAD(&rec->list);
5126
5127         return rec;
5128 }
5129
5130 static int process_block_group_item(struct block_group_tree *block_group_cache,
5131                                     struct btrfs_key *key,
5132                                     struct extent_buffer *eb, int slot)
5133 {
5134         struct block_group_record *rec;
5135         int ret = 0;
5136
5137         rec = btrfs_new_block_group_record(eb, key, slot);
5138         ret = insert_block_group_record(block_group_cache, rec);
5139         if (ret) {
5140                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5141                         rec->objectid, rec->offset);
5142                 free(rec);
5143         }
5144
5145         return ret;
5146 }
5147
5148 struct device_extent_record *
5149 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5150                                struct btrfs_key *key, int slot)
5151 {
5152         struct device_extent_record *rec;
5153         struct btrfs_dev_extent *ptr;
5154
5155         rec = calloc(1, sizeof(*rec));
5156         if (!rec) {
5157                 fprintf(stderr, "memory allocation failed\n");
5158                 exit(-1);
5159         }
5160
5161         rec->cache.objectid = key->objectid;
5162         rec->cache.start = key->offset;
5163
5164         rec->generation = btrfs_header_generation(leaf);
5165
5166         rec->objectid = key->objectid;
5167         rec->type = key->type;
5168         rec->offset = key->offset;
5169
5170         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5171         rec->chunk_objecteid =
5172                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5173         rec->chunk_offset =
5174                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5175         rec->length = btrfs_dev_extent_length(leaf, ptr);
5176         rec->cache.size = rec->length;
5177
5178         INIT_LIST_HEAD(&rec->chunk_list);
5179         INIT_LIST_HEAD(&rec->device_list);
5180
5181         return rec;
5182 }
5183
5184 static int
5185 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5186                            struct btrfs_key *key, struct extent_buffer *eb,
5187                            int slot)
5188 {
5189         struct device_extent_record *rec;
5190         int ret;
5191
5192         rec = btrfs_new_device_extent_record(eb, key, slot);
5193         ret = insert_device_extent_record(dev_extent_cache, rec);
5194         if (ret) {
5195                 fprintf(stderr,
5196                         "Device extent[%llu, %llu, %llu] existed.\n",
5197                         rec->objectid, rec->offset, rec->length);
5198                 free(rec);
5199         }
5200
5201         return ret;
5202 }
5203
5204 static int process_extent_item(struct btrfs_root *root,
5205                                struct cache_tree *extent_cache,
5206                                struct extent_buffer *eb, int slot)
5207 {
5208         struct btrfs_extent_item *ei;
5209         struct btrfs_extent_inline_ref *iref;
5210         struct btrfs_extent_data_ref *dref;
5211         struct btrfs_shared_data_ref *sref;
5212         struct btrfs_key key;
5213         unsigned long end;
5214         unsigned long ptr;
5215         int type;
5216         u32 item_size = btrfs_item_size_nr(eb, slot);
5217         u64 refs = 0;
5218         u64 offset;
5219         u64 num_bytes;
5220         int metadata = 0;
5221
5222         btrfs_item_key_to_cpu(eb, &key, slot);
5223
5224         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5225                 metadata = 1;
5226                 num_bytes = root->nodesize;
5227         } else {
5228                 num_bytes = key.offset;
5229         }
5230
5231         if (item_size < sizeof(*ei)) {
5232 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5233                 struct btrfs_extent_item_v0 *ei0;
5234                 BUG_ON(item_size != sizeof(*ei0));
5235                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5236                 refs = btrfs_extent_refs_v0(eb, ei0);
5237 #else
5238                 BUG();
5239 #endif
5240                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5241                                       num_bytes, refs, 0, 0, 0, metadata, 1,
5242                                       num_bytes);
5243         }
5244
5245         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5246         refs = btrfs_extent_refs(eb, ei);
5247         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5248                 metadata = 1;
5249         else
5250                 metadata = 0;
5251
5252         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5253                        refs, 0, 0, 0, metadata, 1, num_bytes);
5254
5255         ptr = (unsigned long)(ei + 1);
5256         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5257             key.type == BTRFS_EXTENT_ITEM_KEY)
5258                 ptr += sizeof(struct btrfs_tree_block_info);
5259
5260         end = (unsigned long)ei + item_size;
5261         while (ptr < end) {
5262                 iref = (struct btrfs_extent_inline_ref *)ptr;
5263                 type = btrfs_extent_inline_ref_type(eb, iref);
5264                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5265                 switch (type) {
5266                 case BTRFS_TREE_BLOCK_REF_KEY:
5267                         add_tree_backref(extent_cache, key.objectid,
5268                                          0, offset, 0);
5269                         break;
5270                 case BTRFS_SHARED_BLOCK_REF_KEY:
5271                         add_tree_backref(extent_cache, key.objectid,
5272                                          offset, 0, 0);
5273                         break;
5274                 case BTRFS_EXTENT_DATA_REF_KEY:
5275                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5276                         add_data_backref(extent_cache, key.objectid, 0,
5277                                         btrfs_extent_data_ref_root(eb, dref),
5278                                         btrfs_extent_data_ref_objectid(eb,
5279                                                                        dref),
5280                                         btrfs_extent_data_ref_offset(eb, dref),
5281                                         btrfs_extent_data_ref_count(eb, dref),
5282                                         0, num_bytes);
5283                         break;
5284                 case BTRFS_SHARED_DATA_REF_KEY:
5285                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5286                         add_data_backref(extent_cache, key.objectid, offset,
5287                                         0, 0, 0,
5288                                         btrfs_shared_data_ref_count(eb, sref),
5289                                         0, num_bytes);
5290                         break;
5291                 default:
5292                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5293                                 key.objectid, key.type, num_bytes);
5294                         goto out;
5295                 }
5296                 ptr += btrfs_extent_inline_ref_size(type);
5297         }
5298         WARN_ON(ptr > end);
5299 out:
5300         return 0;
5301 }
5302
5303 static int check_cache_range(struct btrfs_root *root,
5304                              struct btrfs_block_group_cache *cache,
5305                              u64 offset, u64 bytes)
5306 {
5307         struct btrfs_free_space *entry;
5308         u64 *logical;
5309         u64 bytenr;
5310         int stripe_len;
5311         int i, nr, ret;
5312
5313         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5314                 bytenr = btrfs_sb_offset(i);
5315                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5316                                        cache->key.objectid, bytenr, 0,
5317                                        &logical, &nr, &stripe_len);
5318                 if (ret)
5319                         return ret;
5320
5321                 while (nr--) {
5322                         if (logical[nr] + stripe_len <= offset)
5323                                 continue;
5324                         if (offset + bytes <= logical[nr])
5325                                 continue;
5326                         if (logical[nr] == offset) {
5327                                 if (stripe_len >= bytes) {
5328                                         kfree(logical);
5329                                         return 0;
5330                                 }
5331                                 bytes -= stripe_len;
5332                                 offset += stripe_len;
5333                         } else if (logical[nr] < offset) {
5334                                 if (logical[nr] + stripe_len >=
5335                                     offset + bytes) {
5336                                         kfree(logical);
5337                                         return 0;
5338                                 }
5339                                 bytes = (offset + bytes) -
5340                                         (logical[nr] + stripe_len);
5341                                 offset = logical[nr] + stripe_len;
5342                         } else {
5343                                 /*
5344                                  * Could be tricky, the super may land in the
5345                                  * middle of the area we're checking.  First
5346                                  * check the easiest case, it's at the end.
5347                                  */
5348                                 if (logical[nr] + stripe_len >=
5349                                     bytes + offset) {
5350                                         bytes = logical[nr] - offset;
5351                                         continue;
5352                                 }
5353
5354                                 /* Check the left side */
5355                                 ret = check_cache_range(root, cache,
5356                                                         offset,
5357                                                         logical[nr] - offset);
5358                                 if (ret) {
5359                                         kfree(logical);
5360                                         return ret;
5361                                 }
5362
5363                                 /* Now we continue with the right side */
5364                                 bytes = (offset + bytes) -
5365                                         (logical[nr] + stripe_len);
5366                                 offset = logical[nr] + stripe_len;
5367                         }
5368                 }
5369
5370                 kfree(logical);
5371         }
5372
5373         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5374         if (!entry) {
5375                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5376                         offset, offset+bytes);
5377                 return -EINVAL;
5378         }
5379
5380         if (entry->offset != offset) {
5381                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5382                         entry->offset);
5383                 return -EINVAL;
5384         }
5385
5386         if (entry->bytes != bytes) {
5387                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5388                         bytes, entry->bytes, offset);
5389                 return -EINVAL;
5390         }
5391
5392         unlink_free_space(cache->free_space_ctl, entry);
5393         free(entry);
5394         return 0;
5395 }
5396
5397 static int verify_space_cache(struct btrfs_root *root,
5398                               struct btrfs_block_group_cache *cache)
5399 {
5400         struct btrfs_path *path;
5401         struct extent_buffer *leaf;
5402         struct btrfs_key key;
5403         u64 last;
5404         int ret = 0;
5405
5406         path = btrfs_alloc_path();
5407         if (!path)
5408                 return -ENOMEM;
5409
5410         root = root->fs_info->extent_root;
5411
5412         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5413
5414         key.objectid = last;
5415         key.offset = 0;
5416         key.type = BTRFS_EXTENT_ITEM_KEY;
5417
5418         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5419         if (ret < 0)
5420                 goto out;
5421         ret = 0;
5422         while (1) {
5423                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5424                         ret = btrfs_next_leaf(root, path);
5425                         if (ret < 0)
5426                                 goto out;
5427                         if (ret > 0) {
5428                                 ret = 0;
5429                                 break;
5430                         }
5431                 }
5432                 leaf = path->nodes[0];
5433                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5434                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5435                         break;
5436                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5437                     key.type != BTRFS_METADATA_ITEM_KEY) {
5438                         path->slots[0]++;
5439                         continue;
5440                 }
5441
5442                 if (last == key.objectid) {
5443                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5444                                 last = key.objectid + key.offset;
5445                         else
5446                                 last = key.objectid + root->nodesize;
5447                         path->slots[0]++;
5448                         continue;
5449                 }
5450
5451                 ret = check_cache_range(root, cache, last,
5452                                         key.objectid - last);
5453                 if (ret)
5454                         break;
5455                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5456                         last = key.objectid + key.offset;
5457                 else
5458                         last = key.objectid + root->nodesize;
5459                 path->slots[0]++;
5460         }
5461
5462         if (last < cache->key.objectid + cache->key.offset)
5463                 ret = check_cache_range(root, cache, last,
5464                                         cache->key.objectid +
5465                                         cache->key.offset - last);
5466
5467 out:
5468         btrfs_free_path(path);
5469
5470         if (!ret &&
5471             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5472                 fprintf(stderr, "There are still entries left in the space "
5473                         "cache\n");
5474                 ret = -EINVAL;
5475         }
5476
5477         return ret;
5478 }
5479
5480 static int check_space_cache(struct btrfs_root *root)
5481 {
5482         struct btrfs_block_group_cache *cache;
5483         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5484         int ret;
5485         int error = 0;
5486
5487         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5488             btrfs_super_generation(root->fs_info->super_copy) !=
5489             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5490                 printf("cache and super generation don't match, space cache "
5491                        "will be invalidated\n");
5492                 return 0;
5493         }
5494
5495         if (ctx.progress_enabled) {
5496                 ctx.tp = TASK_FREE_SPACE;
5497                 task_start(ctx.info);
5498         }
5499
5500         while (1) {
5501                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5502                 if (!cache)
5503                         break;
5504
5505                 start = cache->key.objectid + cache->key.offset;
5506                 if (!cache->free_space_ctl) {
5507                         if (btrfs_init_free_space_ctl(cache,
5508                                                       root->sectorsize)) {
5509                                 ret = -ENOMEM;
5510                                 break;
5511                         }
5512                 } else {
5513                         btrfs_remove_free_space_cache(cache);
5514                 }
5515
5516                 if (btrfs_fs_compat_ro(root->fs_info,
5517                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5518                         ret = exclude_super_stripes(root, cache);
5519                         if (ret) {
5520                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5521                                         strerror(-ret));
5522                                 error++;
5523                                 continue;
5524                         }
5525                         ret = load_free_space_tree(root->fs_info, cache);
5526                         free_excluded_extents(root, cache);
5527                         if (ret < 0) {
5528                                 fprintf(stderr, "could not load free space tree: %s\n",
5529                                         strerror(-ret));
5530                                 error++;
5531                                 continue;
5532                         }
5533                         error += ret;
5534                 } else {
5535                         ret = load_free_space_cache(root->fs_info, cache);
5536                         if (!ret)
5537                                 continue;
5538                 }
5539
5540                 ret = verify_space_cache(root, cache);
5541                 if (ret) {
5542                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
5543                                 cache->key.objectid);
5544                         error++;
5545                 }
5546         }
5547
5548         task_stop(ctx.info);
5549
5550         return error ? -EINVAL : 0;
5551 }
5552
5553 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5554                         u64 num_bytes, unsigned long leaf_offset,
5555                         struct extent_buffer *eb) {
5556
5557         u64 offset = 0;
5558         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5559         char *data;
5560         unsigned long csum_offset;
5561         u32 csum;
5562         u32 csum_expected;
5563         u64 read_len;
5564         u64 data_checked = 0;
5565         u64 tmp;
5566         int ret = 0;
5567         int mirror;
5568         int num_copies;
5569
5570         if (num_bytes % root->sectorsize)
5571                 return -EINVAL;
5572
5573         data = malloc(num_bytes);
5574         if (!data)
5575                 return -ENOMEM;
5576
5577         while (offset < num_bytes) {
5578                 mirror = 0;
5579 again:
5580                 read_len = num_bytes - offset;
5581                 /* read as much space once a time */
5582                 ret = read_extent_data(root, data + offset,
5583                                 bytenr + offset, &read_len, mirror);
5584                 if (ret)
5585                         goto out;
5586                 data_checked = 0;
5587                 /* verify every 4k data's checksum */
5588                 while (data_checked < read_len) {
5589                         csum = ~(u32)0;
5590                         tmp = offset + data_checked;
5591
5592                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5593                                                csum, root->sectorsize);
5594                         btrfs_csum_final(csum, (char *)&csum);
5595
5596                         csum_offset = leaf_offset +
5597                                  tmp / root->sectorsize * csum_size;
5598                         read_extent_buffer(eb, (char *)&csum_expected,
5599                                            csum_offset, csum_size);
5600                         /* try another mirror */
5601                         if (csum != csum_expected) {
5602                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5603                                                 mirror, bytenr + tmp,
5604                                                 csum, csum_expected);
5605                                 num_copies = btrfs_num_copies(
5606                                                 &root->fs_info->mapping_tree,
5607                                                 bytenr, num_bytes);
5608                                 if (mirror < num_copies - 1) {
5609                                         mirror += 1;
5610                                         goto again;
5611                                 }
5612                         }
5613                         data_checked += root->sectorsize;
5614                 }
5615                 offset += read_len;
5616         }
5617 out:
5618         free(data);
5619         return ret;
5620 }
5621
5622 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5623                                u64 num_bytes)
5624 {
5625         struct btrfs_path *path;
5626         struct extent_buffer *leaf;
5627         struct btrfs_key key;
5628         int ret;
5629
5630         path = btrfs_alloc_path();
5631         if (!path) {
5632                 fprintf(stderr, "Error allocing path\n");
5633                 return -ENOMEM;
5634         }
5635
5636         key.objectid = bytenr;
5637         key.type = BTRFS_EXTENT_ITEM_KEY;
5638         key.offset = (u64)-1;
5639
5640 again:
5641         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5642                                 0, 0);
5643         if (ret < 0) {
5644                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5645                 btrfs_free_path(path);
5646                 return ret;
5647         } else if (ret) {
5648                 if (path->slots[0] > 0) {
5649                         path->slots[0]--;
5650                 } else {
5651                         ret = btrfs_prev_leaf(root, path);
5652                         if (ret < 0) {
5653                                 goto out;
5654                         } else if (ret > 0) {
5655                                 ret = 0;
5656                                 goto out;
5657                         }
5658                 }
5659         }
5660
5661         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5662
5663         /*
5664          * Block group items come before extent items if they have the same
5665          * bytenr, so walk back one more just in case.  Dear future traveler,
5666          * first congrats on mastering time travel.  Now if it's not too much
5667          * trouble could you go back to 2006 and tell Chris to make the
5668          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5669          * EXTENT_ITEM_KEY please?
5670          */
5671         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5672                 if (path->slots[0] > 0) {
5673                         path->slots[0]--;
5674                 } else {
5675                         ret = btrfs_prev_leaf(root, path);
5676                         if (ret < 0) {
5677                                 goto out;
5678                         } else if (ret > 0) {
5679                                 ret = 0;
5680                                 goto out;
5681                         }
5682                 }
5683                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5684         }
5685
5686         while (num_bytes) {
5687                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5688                         ret = btrfs_next_leaf(root, path);
5689                         if (ret < 0) {
5690                                 fprintf(stderr, "Error going to next leaf "
5691                                         "%d\n", ret);
5692                                 btrfs_free_path(path);
5693                                 return ret;
5694                         } else if (ret) {
5695                                 break;
5696                         }
5697                 }
5698                 leaf = path->nodes[0];
5699                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5700                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5701                         path->slots[0]++;
5702                         continue;
5703                 }
5704                 if (key.objectid + key.offset < bytenr) {
5705                         path->slots[0]++;
5706                         continue;
5707                 }
5708                 if (key.objectid > bytenr + num_bytes)
5709                         break;
5710
5711                 if (key.objectid == bytenr) {
5712                         if (key.offset >= num_bytes) {
5713                                 num_bytes = 0;
5714                                 break;
5715                         }
5716                         num_bytes -= key.offset;
5717                         bytenr += key.offset;
5718                 } else if (key.objectid < bytenr) {
5719                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5720                                 num_bytes = 0;
5721                                 break;
5722                         }
5723                         num_bytes = (bytenr + num_bytes) -
5724                                 (key.objectid + key.offset);
5725                         bytenr = key.objectid + key.offset;
5726                 } else {
5727                         if (key.objectid + key.offset < bytenr + num_bytes) {
5728                                 u64 new_start = key.objectid + key.offset;
5729                                 u64 new_bytes = bytenr + num_bytes - new_start;
5730
5731                                 /*
5732                                  * Weird case, the extent is in the middle of
5733                                  * our range, we'll have to search one side
5734                                  * and then the other.  Not sure if this happens
5735                                  * in real life, but no harm in coding it up
5736                                  * anyway just in case.
5737                                  */
5738                                 btrfs_release_path(path);
5739                                 ret = check_extent_exists(root, new_start,
5740                                                           new_bytes);
5741                                 if (ret) {
5742                                         fprintf(stderr, "Right section didn't "
5743                                                 "have a record\n");
5744                                         break;
5745                                 }
5746                                 num_bytes = key.objectid - bytenr;
5747                                 goto again;
5748                         }
5749                         num_bytes = key.objectid - bytenr;
5750                 }
5751                 path->slots[0]++;
5752         }
5753         ret = 0;
5754
5755 out:
5756         if (num_bytes && !ret) {
5757                 fprintf(stderr, "There are no extents for csum range "
5758                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5759                 ret = 1;
5760         }
5761
5762         btrfs_free_path(path);
5763         return ret;
5764 }
5765
5766 static int check_csums(struct btrfs_root *root)
5767 {
5768         struct btrfs_path *path;
5769         struct extent_buffer *leaf;
5770         struct btrfs_key key;
5771         u64 offset = 0, num_bytes = 0;
5772         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5773         int errors = 0;
5774         int ret;
5775         u64 data_len;
5776         unsigned long leaf_offset;
5777
5778         root = root->fs_info->csum_root;
5779         if (!extent_buffer_uptodate(root->node)) {
5780                 fprintf(stderr, "No valid csum tree found\n");
5781                 return -ENOENT;
5782         }
5783
5784         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5785         key.type = BTRFS_EXTENT_CSUM_KEY;
5786         key.offset = 0;
5787
5788         path = btrfs_alloc_path();
5789         if (!path)
5790                 return -ENOMEM;
5791
5792         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5793         if (ret < 0) {
5794                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5795                 btrfs_free_path(path);
5796                 return ret;
5797         }
5798
5799         if (ret > 0 && path->slots[0])
5800                 path->slots[0]--;
5801         ret = 0;
5802
5803         while (1) {
5804                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5805                         ret = btrfs_next_leaf(root, path);
5806                         if (ret < 0) {
5807                                 fprintf(stderr, "Error going to next leaf "
5808                                         "%d\n", ret);
5809                                 break;
5810                         }
5811                         if (ret)
5812                                 break;
5813                 }
5814                 leaf = path->nodes[0];
5815
5816                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5817                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5818                         path->slots[0]++;
5819                         continue;
5820                 }
5821
5822                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5823                               csum_size) * root->sectorsize;
5824                 if (!check_data_csum)
5825                         goto skip_csum_check;
5826                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5827                 ret = check_extent_csums(root, key.offset, data_len,
5828                                          leaf_offset, leaf);
5829                 if (ret)
5830                         break;
5831 skip_csum_check:
5832                 if (!num_bytes) {
5833                         offset = key.offset;
5834                 } else if (key.offset != offset + num_bytes) {
5835                         ret = check_extent_exists(root, offset, num_bytes);
5836                         if (ret) {
5837                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5838                                         "there is no extent record\n",
5839                                         offset, offset+num_bytes);
5840                                 errors++;
5841                         }
5842                         offset = key.offset;
5843                         num_bytes = 0;
5844                 }
5845                 num_bytes += data_len;
5846                 path->slots[0]++;
5847         }
5848
5849         btrfs_free_path(path);
5850         return errors;
5851 }
5852
5853 static int is_dropped_key(struct btrfs_key *key,
5854                           struct btrfs_key *drop_key) {
5855         if (key->objectid < drop_key->objectid)
5856                 return 1;
5857         else if (key->objectid == drop_key->objectid) {
5858                 if (key->type < drop_key->type)
5859                         return 1;
5860                 else if (key->type == drop_key->type) {
5861                         if (key->offset < drop_key->offset)
5862                                 return 1;
5863                 }
5864         }
5865         return 0;
5866 }
5867
5868 /*
5869  * Here are the rules for FULL_BACKREF.
5870  *
5871  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5872  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5873  *      FULL_BACKREF set.
5874  * 3) We cow'ed the block walking down a reloc tree.  This is impossible to tell
5875  *    if it happened after the relocation occurred since we'll have dropped the
5876  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5877  *    have no real way to know for sure.
5878  *
5879  * We process the blocks one root at a time, and we start from the lowest root
5880  * objectid and go to the highest.  So we can just lookup the owner backref for
5881  * the record and if we don't find it then we know it doesn't exist and we have
5882  * a FULL BACKREF.
5883  *
5884  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5885  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5886  * be set or not and then we can check later once we've gathered all the refs.
5887  */
5888 static int calc_extent_flag(struct btrfs_root *root,
5889                            struct cache_tree *extent_cache,
5890                            struct extent_buffer *buf,
5891                            struct root_item_record *ri,
5892                            u64 *flags)
5893 {
5894         struct extent_record *rec;
5895         struct cache_extent *cache;
5896         struct tree_backref *tback;
5897         u64 owner = 0;
5898
5899         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5900         /* we have added this extent before */
5901         BUG_ON(!cache);
5902         rec = container_of(cache, struct extent_record, cache);
5903
5904         /*
5905          * Except file/reloc tree, we can not have
5906          * FULL BACKREF MODE
5907          */
5908         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5909                 goto normal;
5910         /*
5911          * root node
5912          */
5913         if (buf->start == ri->bytenr)
5914                 goto normal;
5915
5916         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5917                 goto full_backref;
5918
5919         owner = btrfs_header_owner(buf);
5920         if (owner == ri->objectid)
5921                 goto normal;
5922
5923         tback = find_tree_backref(rec, 0, owner);
5924         if (!tback)
5925                 goto full_backref;
5926 normal:
5927         *flags = 0;
5928         if (rec->flag_block_full_backref != -1 &&
5929             rec->flag_block_full_backref != 0)
5930                 rec->bad_full_backref = 1;
5931         return 0;
5932 full_backref:
5933         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5934         if (rec->flag_block_full_backref != -1 &&
5935             rec->flag_block_full_backref != 1)
5936                 rec->bad_full_backref = 1;
5937         return 0;
5938 }
5939
5940 static int run_next_block(struct btrfs_root *root,
5941                           struct block_info *bits,
5942                           int bits_nr,
5943                           u64 *last,
5944                           struct cache_tree *pending,
5945                           struct cache_tree *seen,
5946                           struct cache_tree *reada,
5947                           struct cache_tree *nodes,
5948                           struct cache_tree *extent_cache,
5949                           struct cache_tree *chunk_cache,
5950                           struct rb_root *dev_cache,
5951                           struct block_group_tree *block_group_cache,
5952                           struct device_extent_tree *dev_extent_cache,
5953                           struct root_item_record *ri)
5954 {
5955         struct extent_buffer *buf;
5956         struct extent_record *rec = NULL;
5957         u64 bytenr;
5958         u32 size;
5959         u64 parent;
5960         u64 owner;
5961         u64 flags;
5962         u64 ptr;
5963         u64 gen = 0;
5964         int ret = 0;
5965         int i;
5966         int nritems;
5967         struct btrfs_key key;
5968         struct cache_extent *cache;
5969         int reada_bits;
5970
5971         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5972                                     bits_nr, &reada_bits);
5973         if (nritems == 0)
5974                 return 1;
5975
5976         if (!reada_bits) {
5977                 for(i = 0; i < nritems; i++) {
5978                         ret = add_cache_extent(reada, bits[i].start,
5979                                                bits[i].size);
5980                         if (ret == -EEXIST)
5981                                 continue;
5982
5983                         /* fixme, get the parent transid */
5984                         readahead_tree_block(root, bits[i].start,
5985                                              bits[i].size, 0);
5986                 }
5987         }
5988         *last = bits[0].start;
5989         bytenr = bits[0].start;
5990         size = bits[0].size;
5991
5992         cache = lookup_cache_extent(pending, bytenr, size);
5993         if (cache) {
5994                 remove_cache_extent(pending, cache);
5995                 free(cache);
5996         }
5997         cache = lookup_cache_extent(reada, bytenr, size);
5998         if (cache) {
5999                 remove_cache_extent(reada, cache);
6000                 free(cache);
6001         }
6002         cache = lookup_cache_extent(nodes, bytenr, size);
6003         if (cache) {
6004                 remove_cache_extent(nodes, cache);
6005                 free(cache);
6006         }
6007         cache = lookup_cache_extent(extent_cache, bytenr, size);
6008         if (cache) {
6009                 rec = container_of(cache, struct extent_record, cache);
6010                 gen = rec->parent_generation;
6011         }
6012
6013         /* fixme, get the real parent transid */
6014         buf = read_tree_block(root, bytenr, size, gen);
6015         if (!extent_buffer_uptodate(buf)) {
6016                 record_bad_block_io(root->fs_info,
6017                                     extent_cache, bytenr, size);
6018                 goto out;
6019         }
6020
6021         nritems = btrfs_header_nritems(buf);
6022
6023         flags = 0;
6024         if (!init_extent_tree) {
6025                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6026                                        btrfs_header_level(buf), 1, NULL,
6027                                        &flags);
6028                 if (ret < 0) {
6029                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6030                         if (ret < 0) {
6031                                 fprintf(stderr, "Couldn't calc extent flags\n");
6032                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6033                         }
6034                 }
6035         } else {
6036                 flags = 0;
6037                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6038                 if (ret < 0) {
6039                         fprintf(stderr, "Couldn't calc extent flags\n");
6040                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6041                 }
6042         }
6043
6044         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6045                 if (ri != NULL &&
6046                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6047                     ri->objectid == btrfs_header_owner(buf)) {
6048                         /*
6049                          * Ok we got to this block from it's original owner and
6050                          * we have FULL_BACKREF set.  Relocation can leave
6051                          * converted blocks over so this is altogether possible,
6052                          * however it's not possible if the generation > the
6053                          * last snapshot, so check for this case.
6054                          */
6055                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6056                             btrfs_header_generation(buf) > ri->last_snapshot) {
6057                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6058                                 rec->bad_full_backref = 1;
6059                         }
6060                 }
6061         } else {
6062                 if (ri != NULL &&
6063                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6064                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6065                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6066                         rec->bad_full_backref = 1;
6067                 }
6068         }
6069
6070         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6071                 rec->flag_block_full_backref = 1;
6072                 parent = bytenr;
6073                 owner = 0;
6074         } else {
6075                 rec->flag_block_full_backref = 0;
6076                 parent = 0;
6077                 owner = btrfs_header_owner(buf);
6078         }
6079
6080         ret = check_block(root, extent_cache, buf, flags);
6081         if (ret)
6082                 goto out;
6083
6084         if (btrfs_is_leaf(buf)) {
6085                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6086                 for (i = 0; i < nritems; i++) {
6087                         struct btrfs_file_extent_item *fi;
6088                         btrfs_item_key_to_cpu(buf, &key, i);
6089                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6090                                 process_extent_item(root, extent_cache, buf,
6091                                                     i);
6092                                 continue;
6093                         }
6094                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6095                                 process_extent_item(root, extent_cache, buf,
6096                                                     i);
6097                                 continue;
6098                         }
6099                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6100                                 total_csum_bytes +=
6101                                         btrfs_item_size_nr(buf, i);
6102                                 continue;
6103                         }
6104                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6105                                 process_chunk_item(chunk_cache, &key, buf, i);
6106                                 continue;
6107                         }
6108                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6109                                 process_device_item(dev_cache, &key, buf, i);
6110                                 continue;
6111                         }
6112                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6113                                 process_block_group_item(block_group_cache,
6114                                         &key, buf, i);
6115                                 continue;
6116                         }
6117                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6118                                 process_device_extent_item(dev_extent_cache,
6119                                         &key, buf, i);
6120                                 continue;
6121
6122                         }
6123                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6124 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6125                                 process_extent_ref_v0(extent_cache, buf, i);
6126 #else
6127                                 BUG();
6128 #endif
6129                                 continue;
6130                         }
6131
6132                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6133                                 add_tree_backref(extent_cache, key.objectid, 0,
6134                                                  key.offset, 0);
6135                                 continue;
6136                         }
6137                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6138                                 add_tree_backref(extent_cache, key.objectid,
6139                                                  key.offset, 0, 0);
6140                                 continue;
6141                         }
6142                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6143                                 struct btrfs_extent_data_ref *ref;
6144                                 ref = btrfs_item_ptr(buf, i,
6145                                                 struct btrfs_extent_data_ref);
6146                                 add_data_backref(extent_cache,
6147                                         key.objectid, 0,
6148                                         btrfs_extent_data_ref_root(buf, ref),
6149                                         btrfs_extent_data_ref_objectid(buf,
6150                                                                        ref),
6151                                         btrfs_extent_data_ref_offset(buf, ref),
6152                                         btrfs_extent_data_ref_count(buf, ref),
6153                                         0, root->sectorsize);
6154                                 continue;
6155                         }
6156                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6157                                 struct btrfs_shared_data_ref *ref;
6158                                 ref = btrfs_item_ptr(buf, i,
6159                                                 struct btrfs_shared_data_ref);
6160                                 add_data_backref(extent_cache,
6161                                         key.objectid, key.offset, 0, 0, 0,
6162                                         btrfs_shared_data_ref_count(buf, ref),
6163                                         0, root->sectorsize);
6164                                 continue;
6165                         }
6166                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6167                                 struct bad_item *bad;
6168
6169                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6170                                         continue;
6171                                 if (!owner)
6172                                         continue;
6173                                 bad = malloc(sizeof(struct bad_item));
6174                                 if (!bad)
6175                                         continue;
6176                                 INIT_LIST_HEAD(&bad->list);
6177                                 memcpy(&bad->key, &key,
6178                                        sizeof(struct btrfs_key));
6179                                 bad->root_id = owner;
6180                                 list_add_tail(&bad->list, &delete_items);
6181                                 continue;
6182                         }
6183                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6184                                 continue;
6185                         fi = btrfs_item_ptr(buf, i,
6186                                             struct btrfs_file_extent_item);
6187                         if (btrfs_file_extent_type(buf, fi) ==
6188                             BTRFS_FILE_EXTENT_INLINE)
6189                                 continue;
6190                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6191                                 continue;
6192
6193                         data_bytes_allocated +=
6194                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6195                         if (data_bytes_allocated < root->sectorsize) {
6196                                 abort();
6197                         }
6198                         data_bytes_referenced +=
6199                                 btrfs_file_extent_num_bytes(buf, fi);
6200                         add_data_backref(extent_cache,
6201                                 btrfs_file_extent_disk_bytenr(buf, fi),
6202                                 parent, owner, key.objectid, key.offset -
6203                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6204                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6205                 }
6206         } else {
6207                 int level;
6208                 struct btrfs_key first_key;
6209
6210                 first_key.objectid = 0;
6211
6212                 if (nritems > 0)
6213                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6214                 level = btrfs_header_level(buf);
6215                 for (i = 0; i < nritems; i++) {
6216                         ptr = btrfs_node_blockptr(buf, i);
6217                         size = root->nodesize;
6218                         btrfs_node_key_to_cpu(buf, &key, i);
6219                         if (ri != NULL) {
6220                                 if ((level == ri->drop_level)
6221                                     && is_dropped_key(&key, &ri->drop_key)) {
6222                                         continue;
6223                                 }
6224                         }
6225                         ret = add_extent_rec(extent_cache, &key,
6226                                              btrfs_node_ptr_generation(buf, i),
6227                                              ptr, size, 0, 0, 1, 0, 1, 0,
6228                                              size);
6229                         BUG_ON(ret);
6230
6231                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6232
6233                         if (level > 1) {
6234                                 add_pending(nodes, seen, ptr, size);
6235                         } else {
6236                                 add_pending(pending, seen, ptr, size);
6237                         }
6238                 }
6239                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6240                                       nritems) * sizeof(struct btrfs_key_ptr);
6241         }
6242         total_btree_bytes += buf->len;
6243         if (fs_root_objectid(btrfs_header_owner(buf)))
6244                 total_fs_tree_bytes += buf->len;
6245         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6246                 total_extent_tree_bytes += buf->len;
6247         if (!found_old_backref &&
6248             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6249             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6250             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6251                 found_old_backref = 1;
6252 out:
6253         free_extent_buffer(buf);
6254         return ret;
6255 }
6256
6257 static int add_root_to_pending(struct extent_buffer *buf,
6258                                struct cache_tree *extent_cache,
6259                                struct cache_tree *pending,
6260                                struct cache_tree *seen,
6261                                struct cache_tree *nodes,
6262                                u64 objectid)
6263 {
6264         if (btrfs_header_level(buf) > 0)
6265                 add_pending(nodes, seen, buf->start, buf->len);
6266         else
6267                 add_pending(pending, seen, buf->start, buf->len);
6268         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6269                        0, 1, 1, 0, 1, 0, buf->len);
6270
6271         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6272             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6273                 add_tree_backref(extent_cache, buf->start, buf->start,
6274                                  0, 1);
6275         else
6276                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6277         return 0;
6278 }
6279
6280 /* as we fix the tree, we might be deleting blocks that
6281  * we're tracking for repair.  This hook makes sure we
6282  * remove any backrefs for blocks as we are fixing them.
6283  */
6284 static int free_extent_hook(struct btrfs_trans_handle *trans,
6285                             struct btrfs_root *root,
6286                             u64 bytenr, u64 num_bytes, u64 parent,
6287                             u64 root_objectid, u64 owner, u64 offset,
6288                             int refs_to_drop)
6289 {
6290         struct extent_record *rec;
6291         struct cache_extent *cache;
6292         int is_data;
6293         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6294
6295         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6296         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6297         if (!cache)
6298                 return 0;
6299
6300         rec = container_of(cache, struct extent_record, cache);
6301         if (is_data) {
6302                 struct data_backref *back;
6303                 back = find_data_backref(rec, parent, root_objectid, owner,
6304                                          offset, 1, bytenr, num_bytes);
6305                 if (!back)
6306                         goto out;
6307                 if (back->node.found_ref) {
6308                         back->found_ref -= refs_to_drop;
6309                         if (rec->refs)
6310                                 rec->refs -= refs_to_drop;
6311                 }
6312                 if (back->node.found_extent_tree) {
6313                         back->num_refs -= refs_to_drop;
6314                         if (rec->extent_item_refs)
6315                                 rec->extent_item_refs -= refs_to_drop;
6316                 }
6317                 if (back->found_ref == 0)
6318                         back->node.found_ref = 0;
6319                 if (back->num_refs == 0)
6320                         back->node.found_extent_tree = 0;
6321
6322                 if (!back->node.found_extent_tree && back->node.found_ref) {
6323                         list_del(&back->node.list);
6324                         free(back);
6325                 }
6326         } else {
6327                 struct tree_backref *back;
6328                 back = find_tree_backref(rec, parent, root_objectid);
6329                 if (!back)
6330                         goto out;
6331                 if (back->node.found_ref) {
6332                         if (rec->refs)
6333                                 rec->refs--;
6334                         back->node.found_ref = 0;
6335                 }
6336                 if (back->node.found_extent_tree) {
6337                         if (rec->extent_item_refs)
6338                                 rec->extent_item_refs--;
6339                         back->node.found_extent_tree = 0;
6340                 }
6341                 if (!back->node.found_extent_tree && back->node.found_ref) {
6342                         list_del(&back->node.list);
6343                         free(back);
6344                 }
6345         }
6346         maybe_free_extent_rec(extent_cache, rec);
6347 out:
6348         return 0;
6349 }
6350
6351 static int delete_extent_records(struct btrfs_trans_handle *trans,
6352                                  struct btrfs_root *root,
6353                                  struct btrfs_path *path,
6354                                  u64 bytenr, u64 new_len)
6355 {
6356         struct btrfs_key key;
6357         struct btrfs_key found_key;
6358         struct extent_buffer *leaf;
6359         int ret;
6360         int slot;
6361
6362
6363         key.objectid = bytenr;
6364         key.type = (u8)-1;
6365         key.offset = (u64)-1;
6366
6367         while(1) {
6368                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6369                                         &key, path, 0, 1);
6370                 if (ret < 0)
6371                         break;
6372
6373                 if (ret > 0) {
6374                         ret = 0;
6375                         if (path->slots[0] == 0)
6376                                 break;
6377                         path->slots[0]--;
6378                 }
6379                 ret = 0;
6380
6381                 leaf = path->nodes[0];
6382                 slot = path->slots[0];
6383
6384                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6385                 if (found_key.objectid != bytenr)
6386                         break;
6387
6388                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6389                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6390                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6391                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6392                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6393                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6394                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6395                         btrfs_release_path(path);
6396                         if (found_key.type == 0) {
6397                                 if (found_key.offset == 0)
6398                                         break;
6399                                 key.offset = found_key.offset - 1;
6400                                 key.type = found_key.type;
6401                         }
6402                         key.type = found_key.type - 1;
6403                         key.offset = (u64)-1;
6404                         continue;
6405                 }
6406
6407                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6408                         found_key.objectid, found_key.type, found_key.offset);
6409
6410                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6411                 if (ret)
6412                         break;
6413                 btrfs_release_path(path);
6414
6415                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6416                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6417                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6418                                 found_key.offset : root->nodesize;
6419
6420                         ret = btrfs_update_block_group(trans, root, bytenr,
6421                                                        bytes, 0, 0);
6422                         if (ret)
6423                                 break;
6424                 }
6425         }
6426
6427         btrfs_release_path(path);
6428         return ret;
6429 }
6430
6431 /*
6432  * for a single backref, this will allocate a new extent
6433  * and add the backref to it.
6434  */
6435 static int record_extent(struct btrfs_trans_handle *trans,
6436                          struct btrfs_fs_info *info,
6437                          struct btrfs_path *path,
6438                          struct extent_record *rec,
6439                          struct extent_backref *back,
6440                          int allocated, u64 flags)
6441 {
6442         int ret;
6443         struct btrfs_root *extent_root = info->extent_root;
6444         struct extent_buffer *leaf;
6445         struct btrfs_key ins_key;
6446         struct btrfs_extent_item *ei;
6447         struct tree_backref *tback;
6448         struct data_backref *dback;
6449         struct btrfs_tree_block_info *bi;
6450
6451         if (!back->is_data)
6452                 rec->max_size = max_t(u64, rec->max_size,
6453                                     info->extent_root->nodesize);
6454
6455         if (!allocated) {
6456                 u32 item_size = sizeof(*ei);
6457
6458                 if (!back->is_data)
6459                         item_size += sizeof(*bi);
6460
6461                 ins_key.objectid = rec->start;
6462                 ins_key.offset = rec->max_size;
6463                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6464
6465                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6466                                         &ins_key, item_size);
6467                 if (ret)
6468                         goto fail;
6469
6470                 leaf = path->nodes[0];
6471                 ei = btrfs_item_ptr(leaf, path->slots[0],
6472                                     struct btrfs_extent_item);
6473
6474                 btrfs_set_extent_refs(leaf, ei, 0);
6475                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6476
6477                 if (back->is_data) {
6478                         btrfs_set_extent_flags(leaf, ei,
6479                                                BTRFS_EXTENT_FLAG_DATA);
6480                 } else {
6481                         struct btrfs_disk_key copy_key;;
6482
6483                         tback = (struct tree_backref *)back;
6484                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6485                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6486                                              sizeof(*bi));
6487
6488                         btrfs_set_disk_key_objectid(&copy_key,
6489                                                     rec->info_objectid);
6490                         btrfs_set_disk_key_type(&copy_key, 0);
6491                         btrfs_set_disk_key_offset(&copy_key, 0);
6492
6493                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6494                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6495
6496                         btrfs_set_extent_flags(leaf, ei,
6497                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6498                 }
6499
6500                 btrfs_mark_buffer_dirty(leaf);
6501                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6502                                                rec->max_size, 1, 0);
6503                 if (ret)
6504                         goto fail;
6505                 btrfs_release_path(path);
6506         }
6507
6508         if (back->is_data) {
6509                 u64 parent;
6510                 int i;
6511
6512                 dback = (struct data_backref *)back;
6513                 if (back->full_backref)
6514                         parent = dback->parent;
6515                 else
6516                         parent = 0;
6517
6518                 for (i = 0; i < dback->found_ref; i++) {
6519                         /* if parent != 0, we're doing a full backref
6520                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6521                          * just makes the backref allocator create a data
6522                          * backref
6523                          */
6524                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6525                                                    rec->start, rec->max_size,
6526                                                    parent,
6527                                                    dback->root,
6528                                                    parent ?
6529                                                    BTRFS_FIRST_FREE_OBJECTID :
6530                                                    dback->owner,
6531                                                    dback->offset);
6532                         if (ret)
6533                                 break;
6534                 }
6535                 fprintf(stderr, "adding new data backref"
6536                                 " on %llu %s %llu owner %llu"
6537                                 " offset %llu found %d\n",
6538                                 (unsigned long long)rec->start,
6539                                 back->full_backref ?
6540                                 "parent" : "root",
6541                                 back->full_backref ?
6542                                 (unsigned long long)parent :
6543                                 (unsigned long long)dback->root,
6544                                 (unsigned long long)dback->owner,
6545                                 (unsigned long long)dback->offset,
6546                                 dback->found_ref);
6547         } else {
6548                 u64 parent;
6549
6550                 tback = (struct tree_backref *)back;
6551                 if (back->full_backref)
6552                         parent = tback->parent;
6553                 else
6554                         parent = 0;
6555
6556                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6557                                            rec->start, rec->max_size,
6558                                            parent, tback->root, 0, 0);
6559                 fprintf(stderr, "adding new tree backref on "
6560                         "start %llu len %llu parent %llu root %llu\n",
6561                         rec->start, rec->max_size, parent, tback->root);
6562         }
6563 fail:
6564         btrfs_release_path(path);
6565         return ret;
6566 }
6567
6568 static struct extent_entry *find_entry(struct list_head *entries,
6569                                        u64 bytenr, u64 bytes)
6570 {
6571         struct extent_entry *entry = NULL;
6572
6573         list_for_each_entry(entry, entries, list) {
6574                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6575                         return entry;
6576         }
6577
6578         return NULL;
6579 }
6580
6581 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6582 {
6583         struct extent_entry *entry, *best = NULL, *prev = NULL;
6584
6585         list_for_each_entry(entry, entries, list) {
6586                 if (!prev) {
6587                         prev = entry;
6588                         continue;
6589                 }
6590
6591                 /*
6592                  * If there are as many broken entries as entries then we know
6593                  * not to trust this particular entry.
6594                  */
6595                 if (entry->broken == entry->count)
6596                         continue;
6597
6598                 /*
6599                  * If our current entry == best then we can't be sure our best
6600                  * is really the best, so we need to keep searching.
6601                  */
6602                 if (best && best->count == entry->count) {
6603                         prev = entry;
6604                         best = NULL;
6605                         continue;
6606                 }
6607
6608                 /* Prev == entry, not good enough, have to keep searching */
6609                 if (!prev->broken && prev->count == entry->count)
6610                         continue;
6611
6612                 if (!best)
6613                         best = (prev->count > entry->count) ? prev : entry;
6614                 else if (best->count < entry->count)
6615                         best = entry;
6616                 prev = entry;
6617         }
6618
6619         return best;
6620 }
6621
6622 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6623                       struct data_backref *dback, struct extent_entry *entry)
6624 {
6625         struct btrfs_trans_handle *trans;
6626         struct btrfs_root *root;
6627         struct btrfs_file_extent_item *fi;
6628         struct extent_buffer *leaf;
6629         struct btrfs_key key;
6630         u64 bytenr, bytes;
6631         int ret, err;
6632
6633         key.objectid = dback->root;
6634         key.type = BTRFS_ROOT_ITEM_KEY;
6635         key.offset = (u64)-1;
6636         root = btrfs_read_fs_root(info, &key);
6637         if (IS_ERR(root)) {
6638                 fprintf(stderr, "Couldn't find root for our ref\n");
6639                 return -EINVAL;
6640         }
6641
6642         /*
6643          * The backref points to the original offset of the extent if it was
6644          * split, so we need to search down to the offset we have and then walk
6645          * forward until we find the backref we're looking for.
6646          */
6647         key.objectid = dback->owner;
6648         key.type = BTRFS_EXTENT_DATA_KEY;
6649         key.offset = dback->offset;
6650         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6651         if (ret < 0) {
6652                 fprintf(stderr, "Error looking up ref %d\n", ret);
6653                 return ret;
6654         }
6655
6656         while (1) {
6657                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6658                         ret = btrfs_next_leaf(root, path);
6659                         if (ret) {
6660                                 fprintf(stderr, "Couldn't find our ref, next\n");
6661                                 return -EINVAL;
6662                         }
6663                 }
6664                 leaf = path->nodes[0];
6665                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6666                 if (key.objectid != dback->owner ||
6667                     key.type != BTRFS_EXTENT_DATA_KEY) {
6668                         fprintf(stderr, "Couldn't find our ref, search\n");
6669                         return -EINVAL;
6670                 }
6671                 fi = btrfs_item_ptr(leaf, path->slots[0],
6672                                     struct btrfs_file_extent_item);
6673                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6674                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6675
6676                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6677                         break;
6678                 path->slots[0]++;
6679         }
6680
6681         btrfs_release_path(path);
6682
6683         trans = btrfs_start_transaction(root, 1);
6684         if (IS_ERR(trans))
6685                 return PTR_ERR(trans);
6686
6687         /*
6688          * Ok we have the key of the file extent we want to fix, now we can cow
6689          * down to the thing and fix it.
6690          */
6691         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6692         if (ret < 0) {
6693                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6694                         key.objectid, key.type, key.offset, ret);
6695                 goto out;
6696         }
6697         if (ret > 0) {
6698                 fprintf(stderr, "Well that's odd, we just found this key "
6699                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6700                         key.offset);
6701                 ret = -EINVAL;
6702                 goto out;
6703         }
6704         leaf = path->nodes[0];
6705         fi = btrfs_item_ptr(leaf, path->slots[0],
6706                             struct btrfs_file_extent_item);
6707
6708         if (btrfs_file_extent_compression(leaf, fi) &&
6709             dback->disk_bytenr != entry->bytenr) {
6710                 fprintf(stderr, "Ref doesn't match the record start and is "
6711                         "compressed, please take a btrfs-image of this file "
6712                         "system and send it to a btrfs developer so they can "
6713                         "complete this functionality for bytenr %Lu\n",
6714                         dback->disk_bytenr);
6715                 ret = -EINVAL;
6716                 goto out;
6717         }
6718
6719         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6720                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6721         } else if (dback->disk_bytenr > entry->bytenr) {
6722                 u64 off_diff, offset;
6723
6724                 off_diff = dback->disk_bytenr - entry->bytenr;
6725                 offset = btrfs_file_extent_offset(leaf, fi);
6726                 if (dback->disk_bytenr + offset +
6727                     btrfs_file_extent_num_bytes(leaf, fi) >
6728                     entry->bytenr + entry->bytes) {
6729                         fprintf(stderr, "Ref is past the entry end, please "
6730                                 "take a btrfs-image of this file system and "
6731                                 "send it to a btrfs developer, ref %Lu\n",
6732                                 dback->disk_bytenr);
6733                         ret = -EINVAL;
6734                         goto out;
6735                 }
6736                 offset += off_diff;
6737                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6738                 btrfs_set_file_extent_offset(leaf, fi, offset);
6739         } else if (dback->disk_bytenr < entry->bytenr) {
6740                 u64 offset;
6741
6742                 offset = btrfs_file_extent_offset(leaf, fi);
6743                 if (dback->disk_bytenr + offset < entry->bytenr) {
6744                         fprintf(stderr, "Ref is before the entry start, please"
6745                                 " take a btrfs-image of this file system and "
6746                                 "send it to a btrfs developer, ref %Lu\n",
6747                                 dback->disk_bytenr);
6748                         ret = -EINVAL;
6749                         goto out;
6750                 }
6751
6752                 offset += dback->disk_bytenr;
6753                 offset -= entry->bytenr;
6754                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6755                 btrfs_set_file_extent_offset(leaf, fi, offset);
6756         }
6757
6758         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6759
6760         /*
6761          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6762          * only do this if we aren't using compression, otherwise it's a
6763          * trickier case.
6764          */
6765         if (!btrfs_file_extent_compression(leaf, fi))
6766                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6767         else
6768                 printf("ram bytes may be wrong?\n");
6769         btrfs_mark_buffer_dirty(leaf);
6770 out:
6771         err = btrfs_commit_transaction(trans, root);
6772         btrfs_release_path(path);
6773         return ret ? ret : err;
6774 }
6775
6776 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6777                            struct extent_record *rec)
6778 {
6779         struct extent_backref *back;
6780         struct data_backref *dback;
6781         struct extent_entry *entry, *best = NULL;
6782         LIST_HEAD(entries);
6783         int nr_entries = 0;
6784         int broken_entries = 0;
6785         int ret = 0;
6786         short mismatch = 0;
6787
6788         /*
6789          * Metadata is easy and the backrefs should always agree on bytenr and
6790          * size, if not we've got bigger issues.
6791          */
6792         if (rec->metadata)
6793                 return 0;
6794
6795         list_for_each_entry(back, &rec->backrefs, list) {
6796                 if (back->full_backref || !back->is_data)
6797                         continue;
6798
6799                 dback = (struct data_backref *)back;
6800
6801                 /*
6802                  * We only pay attention to backrefs that we found a real
6803                  * backref for.
6804                  */
6805                 if (dback->found_ref == 0)
6806                         continue;
6807
6808                 /*
6809                  * For now we only catch when the bytes don't match, not the
6810                  * bytenr.  We can easily do this at the same time, but I want
6811                  * to have a fs image to test on before we just add repair
6812                  * functionality willy-nilly so we know we won't screw up the
6813                  * repair.
6814                  */
6815
6816                 entry = find_entry(&entries, dback->disk_bytenr,
6817                                    dback->bytes);
6818                 if (!entry) {
6819                         entry = malloc(sizeof(struct extent_entry));
6820                         if (!entry) {
6821                                 ret = -ENOMEM;
6822                                 goto out;
6823                         }
6824                         memset(entry, 0, sizeof(*entry));
6825                         entry->bytenr = dback->disk_bytenr;
6826                         entry->bytes = dback->bytes;
6827                         list_add_tail(&entry->list, &entries);
6828                         nr_entries++;
6829                 }
6830
6831                 /*
6832                  * If we only have on entry we may think the entries agree when
6833                  * in reality they don't so we have to do some extra checking.
6834                  */
6835                 if (dback->disk_bytenr != rec->start ||
6836                     dback->bytes != rec->nr || back->broken)
6837                         mismatch = 1;
6838
6839                 if (back->broken) {
6840                         entry->broken++;
6841                         broken_entries++;
6842                 }
6843
6844                 entry->count++;
6845         }
6846
6847         /* Yay all the backrefs agree, carry on good sir */
6848         if (nr_entries <= 1 && !mismatch)
6849                 goto out;
6850
6851         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6852                 "%Lu\n", rec->start);
6853
6854         /*
6855          * First we want to see if the backrefs can agree amongst themselves who
6856          * is right, so figure out which one of the entries has the highest
6857          * count.
6858          */
6859         best = find_most_right_entry(&entries);
6860
6861         /*
6862          * Ok so we may have an even split between what the backrefs think, so
6863          * this is where we use the extent ref to see what it thinks.
6864          */
6865         if (!best) {
6866                 entry = find_entry(&entries, rec->start, rec->nr);
6867                 if (!entry && (!broken_entries || !rec->found_rec)) {
6868                         fprintf(stderr, "Backrefs don't agree with each other "
6869                                 "and extent record doesn't agree with anybody,"
6870                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6871                                 rec->start, rec->nr);
6872                         ret = -EINVAL;
6873                         goto out;
6874                 } else if (!entry) {
6875                         /*
6876                          * Ok our backrefs were broken, we'll assume this is the
6877                          * correct value and add an entry for this range.
6878                          */
6879                         entry = malloc(sizeof(struct extent_entry));
6880                         if (!entry) {
6881                                 ret = -ENOMEM;
6882                                 goto out;
6883                         }
6884                         memset(entry, 0, sizeof(*entry));
6885                         entry->bytenr = rec->start;
6886                         entry->bytes = rec->nr;
6887                         list_add_tail(&entry->list, &entries);
6888                         nr_entries++;
6889                 }
6890                 entry->count++;
6891                 best = find_most_right_entry(&entries);
6892                 if (!best) {
6893                         fprintf(stderr, "Backrefs and extent record evenly "
6894                                 "split on who is right, this is going to "
6895                                 "require user input to fix bytenr %Lu bytes "
6896                                 "%Lu\n", rec->start, rec->nr);
6897                         ret = -EINVAL;
6898                         goto out;
6899                 }
6900         }
6901
6902         /*
6903          * I don't think this can happen currently as we'll abort() if we catch
6904          * this case higher up, but in case somebody removes that we still can't
6905          * deal with it properly here yet, so just bail out of that's the case.
6906          */
6907         if (best->bytenr != rec->start) {
6908                 fprintf(stderr, "Extent start and backref starts don't match, "
6909                         "please use btrfs-image on this file system and send "
6910                         "it to a btrfs developer so they can make fsck fix "
6911                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6912                         rec->start, rec->nr);
6913                 ret = -EINVAL;
6914                 goto out;
6915         }
6916
6917         /*
6918          * Ok great we all agreed on an extent record, let's go find the real
6919          * references and fix up the ones that don't match.
6920          */
6921         list_for_each_entry(back, &rec->backrefs, list) {
6922                 if (back->full_backref || !back->is_data)
6923                         continue;
6924
6925                 dback = (struct data_backref *)back;
6926
6927                 /*
6928                  * Still ignoring backrefs that don't have a real ref attached
6929                  * to them.
6930                  */
6931                 if (dback->found_ref == 0)
6932                         continue;
6933
6934                 if (dback->bytes == best->bytes &&
6935                     dback->disk_bytenr == best->bytenr)
6936                         continue;
6937
6938                 ret = repair_ref(info, path, dback, best);
6939                 if (ret)
6940                         goto out;
6941         }
6942
6943         /*
6944          * Ok we messed with the actual refs, which means we need to drop our
6945          * entire cache and go back and rescan.  I know this is a huge pain and
6946          * adds a lot of extra work, but it's the only way to be safe.  Once all
6947          * the backrefs agree we may not need to do anything to the extent
6948          * record itself.
6949          */
6950         ret = -EAGAIN;
6951 out:
6952         while (!list_empty(&entries)) {
6953                 entry = list_entry(entries.next, struct extent_entry, list);
6954                 list_del_init(&entry->list);
6955                 free(entry);
6956         }
6957         return ret;
6958 }
6959
6960 static int process_duplicates(struct btrfs_root *root,
6961                               struct cache_tree *extent_cache,
6962                               struct extent_record *rec)
6963 {
6964         struct extent_record *good, *tmp;
6965         struct cache_extent *cache;
6966         int ret;
6967
6968         /*
6969          * If we found a extent record for this extent then return, or if we
6970          * have more than one duplicate we are likely going to need to delete
6971          * something.
6972          */
6973         if (rec->found_rec || rec->num_duplicates > 1)
6974                 return 0;
6975
6976         /* Shouldn't happen but just in case */
6977         BUG_ON(!rec->num_duplicates);
6978
6979         /*
6980          * So this happens if we end up with a backref that doesn't match the
6981          * actual extent entry.  So either the backref is bad or the extent
6982          * entry is bad.  Either way we want to have the extent_record actually
6983          * reflect what we found in the extent_tree, so we need to take the
6984          * duplicate out and use that as the extent_record since the only way we
6985          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6986          */
6987         remove_cache_extent(extent_cache, &rec->cache);
6988
6989         good = list_entry(rec->dups.next, struct extent_record, list);
6990         list_del_init(&good->list);
6991         INIT_LIST_HEAD(&good->backrefs);
6992         INIT_LIST_HEAD(&good->dups);
6993         good->cache.start = good->start;
6994         good->cache.size = good->nr;
6995         good->content_checked = 0;
6996         good->owner_ref_checked = 0;
6997         good->num_duplicates = 0;
6998         good->refs = rec->refs;
6999         list_splice_init(&rec->backrefs, &good->backrefs);
7000         while (1) {
7001                 cache = lookup_cache_extent(extent_cache, good->start,
7002                                             good->nr);
7003                 if (!cache)
7004                         break;
7005                 tmp = container_of(cache, struct extent_record, cache);
7006
7007                 /*
7008                  * If we find another overlapping extent and it's found_rec is
7009                  * set then it's a duplicate and we need to try and delete
7010                  * something.
7011                  */
7012                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7013                         if (list_empty(&good->list))
7014                                 list_add_tail(&good->list,
7015                                               &duplicate_extents);
7016                         good->num_duplicates += tmp->num_duplicates + 1;
7017                         list_splice_init(&tmp->dups, &good->dups);
7018                         list_del_init(&tmp->list);
7019                         list_add_tail(&tmp->list, &good->dups);
7020                         remove_cache_extent(extent_cache, &tmp->cache);
7021                         continue;
7022                 }
7023
7024                 /*
7025                  * Ok we have another non extent item backed extent rec, so lets
7026                  * just add it to this extent and carry on like we did above.
7027                  */
7028                 good->refs += tmp->refs;
7029                 list_splice_init(&tmp->backrefs, &good->backrefs);
7030                 remove_cache_extent(extent_cache, &tmp->cache);
7031                 free(tmp);
7032         }
7033         ret = insert_cache_extent(extent_cache, &good->cache);
7034         BUG_ON(ret);
7035         free(rec);
7036         return good->num_duplicates ? 0 : 1;
7037 }
7038
7039 static int delete_duplicate_records(struct btrfs_root *root,
7040                                     struct extent_record *rec)
7041 {
7042         struct btrfs_trans_handle *trans;
7043         LIST_HEAD(delete_list);
7044         struct btrfs_path *path;
7045         struct extent_record *tmp, *good, *n;
7046         int nr_del = 0;
7047         int ret = 0, err;
7048         struct btrfs_key key;
7049
7050         path = btrfs_alloc_path();
7051         if (!path) {
7052                 ret = -ENOMEM;
7053                 goto out;
7054         }
7055
7056         good = rec;
7057         /* Find the record that covers all of the duplicates. */
7058         list_for_each_entry(tmp, &rec->dups, list) {
7059                 if (good->start < tmp->start)
7060                         continue;
7061                 if (good->nr > tmp->nr)
7062                         continue;
7063
7064                 if (tmp->start + tmp->nr < good->start + good->nr) {
7065                         fprintf(stderr, "Ok we have overlapping extents that "
7066                                 "aren't completely covered by eachother, this "
7067                                 "is going to require more careful thought.  "
7068                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7069                                 tmp->start, tmp->nr, good->start, good->nr);
7070                         abort();
7071                 }
7072                 good = tmp;
7073         }
7074
7075         if (good != rec)
7076                 list_add_tail(&rec->list, &delete_list);
7077
7078         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7079                 if (tmp == good)
7080                         continue;
7081                 list_move_tail(&tmp->list, &delete_list);
7082         }
7083
7084         root = root->fs_info->extent_root;
7085         trans = btrfs_start_transaction(root, 1);
7086         if (IS_ERR(trans)) {
7087                 ret = PTR_ERR(trans);
7088                 goto out;
7089         }
7090
7091         list_for_each_entry(tmp, &delete_list, list) {
7092                 if (tmp->found_rec == 0)
7093                         continue;
7094                 key.objectid = tmp->start;
7095                 key.type = BTRFS_EXTENT_ITEM_KEY;
7096                 key.offset = tmp->nr;
7097
7098                 /* Shouldn't happen but just in case */
7099                 if (tmp->metadata) {
7100                         fprintf(stderr, "Well this shouldn't happen, extent "
7101                                 "record overlaps but is metadata? "
7102                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7103                         abort();
7104                 }
7105
7106                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7107                 if (ret) {
7108                         if (ret > 0)
7109                                 ret = -EINVAL;
7110                         break;
7111                 }
7112                 ret = btrfs_del_item(trans, root, path);
7113                 if (ret)
7114                         break;
7115                 btrfs_release_path(path);
7116                 nr_del++;
7117         }
7118         err = btrfs_commit_transaction(trans, root);
7119         if (err && !ret)
7120                 ret = err;
7121 out:
7122         while (!list_empty(&delete_list)) {
7123                 tmp = list_entry(delete_list.next, struct extent_record, list);
7124                 list_del_init(&tmp->list);
7125                 if (tmp == rec)
7126                         continue;
7127                 free(tmp);
7128         }
7129
7130         while (!list_empty(&rec->dups)) {
7131                 tmp = list_entry(rec->dups.next, struct extent_record, list);
7132                 list_del_init(&tmp->list);
7133                 free(tmp);
7134         }
7135
7136         btrfs_free_path(path);
7137
7138         if (!ret && !nr_del)
7139                 rec->num_duplicates = 0;
7140
7141         return ret ? ret : nr_del;
7142 }
7143
7144 static int find_possible_backrefs(struct btrfs_fs_info *info,
7145                                   struct btrfs_path *path,
7146                                   struct cache_tree *extent_cache,
7147                                   struct extent_record *rec)
7148 {
7149         struct btrfs_root *root;
7150         struct extent_backref *back;
7151         struct data_backref *dback;
7152         struct cache_extent *cache;
7153         struct btrfs_file_extent_item *fi;
7154         struct btrfs_key key;
7155         u64 bytenr, bytes;
7156         int ret;
7157
7158         list_for_each_entry(back, &rec->backrefs, list) {
7159                 /* Don't care about full backrefs (poor unloved backrefs) */
7160                 if (back->full_backref || !back->is_data)
7161                         continue;
7162
7163                 dback = (struct data_backref *)back;
7164
7165                 /* We found this one, we don't need to do a lookup */
7166                 if (dback->found_ref)
7167                         continue;
7168
7169                 key.objectid = dback->root;
7170                 key.type = BTRFS_ROOT_ITEM_KEY;
7171                 key.offset = (u64)-1;
7172
7173                 root = btrfs_read_fs_root(info, &key);
7174
7175                 /* No root, definitely a bad ref, skip */
7176                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7177                         continue;
7178                 /* Other err, exit */
7179                 if (IS_ERR(root))
7180                         return PTR_ERR(root);
7181
7182                 key.objectid = dback->owner;
7183                 key.type = BTRFS_EXTENT_DATA_KEY;
7184                 key.offset = dback->offset;
7185                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7186                 if (ret) {
7187                         btrfs_release_path(path);
7188                         if (ret < 0)
7189                                 return ret;
7190                         /* Didn't find it, we can carry on */
7191                         ret = 0;
7192                         continue;
7193                 }
7194
7195                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7196                                     struct btrfs_file_extent_item);
7197                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7198                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7199                 btrfs_release_path(path);
7200                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7201                 if (cache) {
7202                         struct extent_record *tmp;
7203                         tmp = container_of(cache, struct extent_record, cache);
7204
7205                         /*
7206                          * If we found an extent record for the bytenr for this
7207                          * particular backref then we can't add it to our
7208                          * current extent record.  We only want to add backrefs
7209                          * that don't have a corresponding extent item in the
7210                          * extent tree since they likely belong to this record
7211                          * and we need to fix it if it doesn't match bytenrs.
7212                          */
7213                         if  (tmp->found_rec)
7214                                 continue;
7215                 }
7216
7217                 dback->found_ref += 1;
7218                 dback->disk_bytenr = bytenr;
7219                 dback->bytes = bytes;
7220
7221                 /*
7222                  * Set this so the verify backref code knows not to trust the
7223                  * values in this backref.
7224                  */
7225                 back->broken = 1;
7226         }
7227
7228         return 0;
7229 }
7230
7231 /*
7232  * Record orphan data ref into corresponding root.
7233  *
7234  * Return 0 if the extent item contains data ref and recorded.
7235  * Return 1 if the extent item contains no useful data ref
7236  *   On that case, it may contains only shared_dataref or metadata backref
7237  *   or the file extent exists(this should be handled by the extent bytenr
7238  *   recovery routine)
7239  * Return <0 if something goes wrong.
7240  */
7241 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7242                                       struct extent_record *rec)
7243 {
7244         struct btrfs_key key;
7245         struct btrfs_root *dest_root;
7246         struct extent_backref *back;
7247         struct data_backref *dback;
7248         struct orphan_data_extent *orphan;
7249         struct btrfs_path *path;
7250         int recorded_data_ref = 0;
7251         int ret = 0;
7252
7253         if (rec->metadata)
7254                 return 1;
7255         path = btrfs_alloc_path();
7256         if (!path)
7257                 return -ENOMEM;
7258         list_for_each_entry(back, &rec->backrefs, list) {
7259                 if (back->full_backref || !back->is_data ||
7260                     !back->found_extent_tree)
7261                         continue;
7262                 dback = (struct data_backref *)back;
7263                 if (dback->found_ref)
7264                         continue;
7265                 key.objectid = dback->root;
7266                 key.type = BTRFS_ROOT_ITEM_KEY;
7267                 key.offset = (u64)-1;
7268
7269                 dest_root = btrfs_read_fs_root(fs_info, &key);
7270
7271                 /* For non-exist root we just skip it */
7272                 if (IS_ERR(dest_root) || !dest_root)
7273                         continue;
7274
7275                 key.objectid = dback->owner;
7276                 key.type = BTRFS_EXTENT_DATA_KEY;
7277                 key.offset = dback->offset;
7278
7279                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7280                 /*
7281                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7282                  * we need to record it for inode/file extent rebuild.
7283                  * For ret > 0, we record it only for file extent rebuild.
7284                  * For ret == 0, the file extent exists but only bytenr
7285                  * mismatch, let the original bytenr fix routine to handle,
7286                  * don't record it.
7287                  */
7288                 if (ret == 0)
7289                         continue;
7290                 ret = 0;
7291                 orphan = malloc(sizeof(*orphan));
7292                 if (!orphan) {
7293                         ret = -ENOMEM;
7294                         goto out;
7295                 }
7296                 INIT_LIST_HEAD(&orphan->list);
7297                 orphan->root = dback->root;
7298                 orphan->objectid = dback->owner;
7299                 orphan->offset = dback->offset;
7300                 orphan->disk_bytenr = rec->cache.start;
7301                 orphan->disk_len = rec->cache.size;
7302                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7303                 recorded_data_ref = 1;
7304         }
7305 out:
7306         btrfs_free_path(path);
7307         if (!ret)
7308                 return !recorded_data_ref;
7309         else
7310                 return ret;
7311 }
7312
7313 /*
7314  * when an incorrect extent item is found, this will delete
7315  * all of the existing entries for it and recreate them
7316  * based on what the tree scan found.
7317  */
7318 static int fixup_extent_refs(struct btrfs_fs_info *info,
7319                              struct cache_tree *extent_cache,
7320                              struct extent_record *rec)
7321 {
7322         struct btrfs_trans_handle *trans = NULL;
7323         int ret;
7324         struct btrfs_path *path;
7325         struct list_head *cur = rec->backrefs.next;
7326         struct cache_extent *cache;
7327         struct extent_backref *back;
7328         int allocated = 0;
7329         u64 flags = 0;
7330
7331         if (rec->flag_block_full_backref)
7332                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7333
7334         path = btrfs_alloc_path();
7335         if (!path)
7336                 return -ENOMEM;
7337
7338         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7339                 /*
7340                  * Sometimes the backrefs themselves are so broken they don't
7341                  * get attached to any meaningful rec, so first go back and
7342                  * check any of our backrefs that we couldn't find and throw
7343                  * them into the list if we find the backref so that
7344                  * verify_backrefs can figure out what to do.
7345                  */
7346                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7347                 if (ret < 0)
7348                         goto out;
7349         }
7350
7351         /* step one, make sure all of the backrefs agree */
7352         ret = verify_backrefs(info, path, rec);
7353         if (ret < 0)
7354                 goto out;
7355
7356         trans = btrfs_start_transaction(info->extent_root, 1);
7357         if (IS_ERR(trans)) {
7358                 ret = PTR_ERR(trans);
7359                 goto out;
7360         }
7361
7362         /* step two, delete all the existing records */
7363         ret = delete_extent_records(trans, info->extent_root, path,
7364                                     rec->start, rec->max_size);
7365
7366         if (ret < 0)
7367                 goto out;
7368
7369         /* was this block corrupt?  If so, don't add references to it */
7370         cache = lookup_cache_extent(info->corrupt_blocks,
7371                                     rec->start, rec->max_size);
7372         if (cache) {
7373                 ret = 0;
7374                 goto out;
7375         }
7376
7377         /* step three, recreate all the refs we did find */
7378         while(cur != &rec->backrefs) {
7379                 back = list_entry(cur, struct extent_backref, list);
7380                 cur = cur->next;
7381
7382                 /*
7383                  * if we didn't find any references, don't create a
7384                  * new extent record
7385                  */
7386                 if (!back->found_ref)
7387                         continue;
7388
7389                 rec->bad_full_backref = 0;
7390                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7391                 allocated = 1;
7392
7393                 if (ret)
7394                         goto out;
7395         }
7396 out:
7397         if (trans) {
7398                 int err = btrfs_commit_transaction(trans, info->extent_root);
7399                 if (!ret)
7400                         ret = err;
7401         }
7402
7403         btrfs_free_path(path);
7404         return ret;
7405 }
7406
7407 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7408                               struct extent_record *rec)
7409 {
7410         struct btrfs_trans_handle *trans;
7411         struct btrfs_root *root = fs_info->extent_root;
7412         struct btrfs_path *path;
7413         struct btrfs_extent_item *ei;
7414         struct btrfs_key key;
7415         u64 flags;
7416         int ret = 0;
7417
7418         key.objectid = rec->start;
7419         if (rec->metadata) {
7420                 key.type = BTRFS_METADATA_ITEM_KEY;
7421                 key.offset = rec->info_level;
7422         } else {
7423                 key.type = BTRFS_EXTENT_ITEM_KEY;
7424                 key.offset = rec->max_size;
7425         }
7426
7427         path = btrfs_alloc_path();
7428         if (!path)
7429                 return -ENOMEM;
7430
7431         trans = btrfs_start_transaction(root, 0);
7432         if (IS_ERR(trans)) {
7433                 btrfs_free_path(path);
7434                 return PTR_ERR(trans);
7435         }
7436
7437         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7438         if (ret < 0) {
7439                 btrfs_free_path(path);
7440                 btrfs_commit_transaction(trans, root);
7441                 return ret;
7442         } else if (ret) {
7443                 fprintf(stderr, "Didn't find extent for %llu\n",
7444                         (unsigned long long)rec->start);
7445                 btrfs_free_path(path);
7446                 btrfs_commit_transaction(trans, root);
7447                 return -ENOENT;
7448         }
7449
7450         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7451                             struct btrfs_extent_item);
7452         flags = btrfs_extent_flags(path->nodes[0], ei);
7453         if (rec->flag_block_full_backref) {
7454                 fprintf(stderr, "setting full backref on %llu\n",
7455                         (unsigned long long)key.objectid);
7456                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7457         } else {
7458                 fprintf(stderr, "clearing full backref on %llu\n",
7459                         (unsigned long long)key.objectid);
7460                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7461         }
7462         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7463         btrfs_mark_buffer_dirty(path->nodes[0]);
7464         btrfs_free_path(path);
7465         return btrfs_commit_transaction(trans, root);
7466 }
7467
7468 /* right now we only prune from the extent allocation tree */
7469 static int prune_one_block(struct btrfs_trans_handle *trans,
7470                            struct btrfs_fs_info *info,
7471                            struct btrfs_corrupt_block *corrupt)
7472 {
7473         int ret;
7474         struct btrfs_path path;
7475         struct extent_buffer *eb;
7476         u64 found;
7477         int slot;
7478         int nritems;
7479         int level = corrupt->level + 1;
7480
7481         btrfs_init_path(&path);
7482 again:
7483         /* we want to stop at the parent to our busted block */
7484         path.lowest_level = level;
7485
7486         ret = btrfs_search_slot(trans, info->extent_root,
7487                                 &corrupt->key, &path, -1, 1);
7488
7489         if (ret < 0)
7490                 goto out;
7491
7492         eb = path.nodes[level];
7493         if (!eb) {
7494                 ret = -ENOENT;
7495                 goto out;
7496         }
7497
7498         /*
7499          * hopefully the search gave us the block we want to prune,
7500          * lets try that first
7501          */
7502         slot = path.slots[level];
7503         found =  btrfs_node_blockptr(eb, slot);
7504         if (found == corrupt->cache.start)
7505                 goto del_ptr;
7506
7507         nritems = btrfs_header_nritems(eb);
7508
7509         /* the search failed, lets scan this node and hope we find it */
7510         for (slot = 0; slot < nritems; slot++) {
7511                 found =  btrfs_node_blockptr(eb, slot);
7512                 if (found == corrupt->cache.start)
7513                         goto del_ptr;
7514         }
7515         /*
7516          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7517          * to this block
7518          */
7519         if (eb == info->extent_root->node) {
7520                 ret = -ENOENT;
7521                 goto out;
7522         } else {
7523                 level++;
7524                 btrfs_release_path(&path);
7525                 goto again;
7526         }
7527
7528 del_ptr:
7529         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7530         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7531
7532 out:
7533         btrfs_release_path(&path);
7534         return ret;
7535 }
7536
7537 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7538 {
7539         struct btrfs_trans_handle *trans = NULL;
7540         struct cache_extent *cache;
7541         struct btrfs_corrupt_block *corrupt;
7542
7543         while (1) {
7544                 cache = search_cache_extent(info->corrupt_blocks, 0);
7545                 if (!cache)
7546                         break;
7547                 if (!trans) {
7548                         trans = btrfs_start_transaction(info->extent_root, 1);
7549                         if (IS_ERR(trans))
7550                                 return PTR_ERR(trans);
7551                 }
7552                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7553                 prune_one_block(trans, info, corrupt);
7554                 remove_cache_extent(info->corrupt_blocks, cache);
7555         }
7556         if (trans)
7557                 return btrfs_commit_transaction(trans, info->extent_root);
7558         return 0;
7559 }
7560
7561 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7562 {
7563         struct btrfs_block_group_cache *cache;
7564         u64 start, end;
7565         int ret;
7566
7567         while (1) {
7568                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7569                                             &start, &end, EXTENT_DIRTY);
7570                 if (ret)
7571                         break;
7572                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7573                                    GFP_NOFS);
7574         }
7575
7576         start = 0;
7577         while (1) {
7578                 cache = btrfs_lookup_first_block_group(fs_info, start);
7579                 if (!cache)
7580                         break;
7581                 if (cache->cached)
7582                         cache->cached = 0;
7583                 start = cache->key.objectid + cache->key.offset;
7584         }
7585 }
7586
7587 static int check_extent_refs(struct btrfs_root *root,
7588                              struct cache_tree *extent_cache)
7589 {
7590         struct extent_record *rec;
7591         struct cache_extent *cache;
7592         int err = 0;
7593         int ret = 0;
7594         int fixed = 0;
7595         int had_dups = 0;
7596         int recorded = 0;
7597
7598         if (repair) {
7599                 /*
7600                  * if we're doing a repair, we have to make sure
7601                  * we don't allocate from the problem extents.
7602                  * In the worst case, this will be all the
7603                  * extents in the FS
7604                  */
7605                 cache = search_cache_extent(extent_cache, 0);
7606                 while(cache) {
7607                         rec = container_of(cache, struct extent_record, cache);
7608                         set_extent_dirty(root->fs_info->excluded_extents,
7609                                          rec->start,
7610                                          rec->start + rec->max_size - 1,
7611                                          GFP_NOFS);
7612                         cache = next_cache_extent(cache);
7613                 }
7614
7615                 /* pin down all the corrupted blocks too */
7616                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7617                 while(cache) {
7618                         set_extent_dirty(root->fs_info->excluded_extents,
7619                                          cache->start,
7620                                          cache->start + cache->size - 1,
7621                                          GFP_NOFS);
7622                         cache = next_cache_extent(cache);
7623                 }
7624                 prune_corrupt_blocks(root->fs_info);
7625                 reset_cached_block_groups(root->fs_info);
7626         }
7627
7628         reset_cached_block_groups(root->fs_info);
7629
7630         /*
7631          * We need to delete any duplicate entries we find first otherwise we
7632          * could mess up the extent tree when we have backrefs that actually
7633          * belong to a different extent item and not the weird duplicate one.
7634          */
7635         while (repair && !list_empty(&duplicate_extents)) {
7636                 rec = list_entry(duplicate_extents.next, struct extent_record,
7637                                  list);
7638                 list_del_init(&rec->list);
7639
7640                 /* Sometimes we can find a backref before we find an actual
7641                  * extent, so we need to process it a little bit to see if there
7642                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7643                  * if this is a backref screwup.  If we need to delete stuff
7644                  * process_duplicates() will return 0, otherwise it will return
7645                  * 1 and we
7646                  */
7647                 if (process_duplicates(root, extent_cache, rec))
7648                         continue;
7649                 ret = delete_duplicate_records(root, rec);
7650                 if (ret < 0)
7651                         return ret;
7652                 /*
7653                  * delete_duplicate_records will return the number of entries
7654                  * deleted, so if it's greater than 0 then we know we actually
7655                  * did something and we need to remove.
7656                  */
7657                 if (ret)
7658                         had_dups = 1;
7659         }
7660
7661         if (had_dups)
7662                 return -EAGAIN;
7663
7664         while(1) {
7665                 int cur_err = 0;
7666
7667                 fixed = 0;
7668                 recorded = 0;
7669                 cache = search_cache_extent(extent_cache, 0);
7670                 if (!cache)
7671                         break;
7672                 rec = container_of(cache, struct extent_record, cache);
7673                 if (rec->num_duplicates) {
7674                         fprintf(stderr, "extent item %llu has multiple extent "
7675                                 "items\n", (unsigned long long)rec->start);
7676                         err = 1;
7677                         cur_err = 1;
7678                 }
7679
7680                 if (rec->refs != rec->extent_item_refs) {
7681                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7682                                 (unsigned long long)rec->start,
7683                                 (unsigned long long)rec->nr);
7684                         fprintf(stderr, "extent item %llu, found %llu\n",
7685                                 (unsigned long long)rec->extent_item_refs,
7686                                 (unsigned long long)rec->refs);
7687                         ret = record_orphan_data_extents(root->fs_info, rec);
7688                         if (ret < 0)
7689                                 goto repair_abort;
7690                         if (ret == 0) {
7691                                 recorded = 1;
7692                         } else {
7693                                 /*
7694                                  * we can't use the extent to repair file
7695                                  * extent, let the fallback method handle it.
7696                                  */
7697                                 if (!fixed && repair) {
7698                                         ret = fixup_extent_refs(
7699                                                         root->fs_info,
7700                                                         extent_cache, rec);
7701                                         if (ret)
7702                                                 goto repair_abort;
7703                                         fixed = 1;
7704                                 }
7705                         }
7706                         err = 1;
7707                         cur_err = 1;
7708                 }
7709                 if (all_backpointers_checked(rec, 1)) {
7710                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7711                                 (unsigned long long)rec->start,
7712                                 (unsigned long long)rec->nr);
7713
7714                         if (!fixed && !recorded && repair) {
7715                                 ret = fixup_extent_refs(root->fs_info,
7716                                                         extent_cache, rec);
7717                                 if (ret)
7718                                         goto repair_abort;
7719                                 fixed = 1;
7720                         }
7721                         cur_err = 1;
7722                         err = 1;
7723                 }
7724                 if (!rec->owner_ref_checked) {
7725                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7726                                 (unsigned long long)rec->start,
7727                                 (unsigned long long)rec->nr);
7728                         if (!fixed && !recorded && repair) {
7729                                 ret = fixup_extent_refs(root->fs_info,
7730                                                         extent_cache, rec);
7731                                 if (ret)
7732                                         goto repair_abort;
7733                                 fixed = 1;
7734                         }
7735                         err = 1;
7736                         cur_err = 1;
7737                 }
7738                 if (rec->bad_full_backref) {
7739                         fprintf(stderr, "bad full backref, on [%llu]\n",
7740                                 (unsigned long long)rec->start);
7741                         if (repair) {
7742                                 ret = fixup_extent_flags(root->fs_info, rec);
7743                                 if (ret)
7744                                         goto repair_abort;
7745                                 fixed = 1;
7746                         }
7747                         err = 1;
7748                         cur_err = 1;
7749                 }
7750                 /*
7751                  * Although it's not a extent ref's problem, we reuse this
7752                  * routine for error reporting.
7753                  * No repair function yet.
7754                  */
7755                 if (rec->crossing_stripes) {
7756                         fprintf(stderr,
7757                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7758                                 rec->start, rec->start + rec->max_size);
7759                         err = 1;
7760                         cur_err = 1;
7761                 }
7762
7763                 if (rec->wrong_chunk_type) {
7764                         fprintf(stderr,
7765                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7766                                 rec->start, rec->start + rec->max_size);
7767                         err = 1;
7768                         cur_err = 1;
7769                 }
7770
7771                 remove_cache_extent(extent_cache, cache);
7772                 free_all_extent_backrefs(rec);
7773                 if (!init_extent_tree && repair && (!cur_err || fixed))
7774                         clear_extent_dirty(root->fs_info->excluded_extents,
7775                                            rec->start,
7776                                            rec->start + rec->max_size - 1,
7777                                            GFP_NOFS);
7778                 free(rec);
7779         }
7780 repair_abort:
7781         if (repair) {
7782                 if (ret && ret != -EAGAIN) {
7783                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7784                         exit(1);
7785                 } else if (!ret) {
7786                         struct btrfs_trans_handle *trans;
7787
7788                         root = root->fs_info->extent_root;
7789                         trans = btrfs_start_transaction(root, 1);
7790                         if (IS_ERR(trans)) {
7791                                 ret = PTR_ERR(trans);
7792                                 goto repair_abort;
7793                         }
7794
7795                         btrfs_fix_block_accounting(trans, root);
7796                         ret = btrfs_commit_transaction(trans, root);
7797                         if (ret)
7798                                 goto repair_abort;
7799                 }
7800                 if (err)
7801                         fprintf(stderr, "repaired damaged extent references\n");
7802                 return ret;
7803         }
7804         return err;
7805 }
7806
7807 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7808 {
7809         u64 stripe_size;
7810
7811         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7812                 stripe_size = length;
7813                 stripe_size /= num_stripes;
7814         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7815                 stripe_size = length * 2;
7816                 stripe_size /= num_stripes;
7817         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7818                 stripe_size = length;
7819                 stripe_size /= (num_stripes - 1);
7820         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7821                 stripe_size = length;
7822                 stripe_size /= (num_stripes - 2);
7823         } else {
7824                 stripe_size = length;
7825         }
7826         return stripe_size;
7827 }
7828
7829 /*
7830  * Check the chunk with its block group/dev list ref:
7831  * Return 0 if all refs seems valid.
7832  * Return 1 if part of refs seems valid, need later check for rebuild ref
7833  * like missing block group and needs to search extent tree to rebuild them.
7834  * Return -1 if essential refs are missing and unable to rebuild.
7835  */
7836 static int check_chunk_refs(struct chunk_record *chunk_rec,
7837                             struct block_group_tree *block_group_cache,
7838                             struct device_extent_tree *dev_extent_cache,
7839                             int silent)
7840 {
7841         struct cache_extent *block_group_item;
7842         struct block_group_record *block_group_rec;
7843         struct cache_extent *dev_extent_item;
7844         struct device_extent_record *dev_extent_rec;
7845         u64 devid;
7846         u64 offset;
7847         u64 length;
7848         int metadump_v2 = 0;
7849         int i;
7850         int ret = 0;
7851
7852         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7853                                                chunk_rec->offset,
7854                                                chunk_rec->length);
7855         if (block_group_item) {
7856                 block_group_rec = container_of(block_group_item,
7857                                                struct block_group_record,
7858                                                cache);
7859                 if (chunk_rec->length != block_group_rec->offset ||
7860                     chunk_rec->offset != block_group_rec->objectid ||
7861                     (!metadump_v2 &&
7862                      chunk_rec->type_flags != block_group_rec->flags)) {
7863                         if (!silent)
7864                                 fprintf(stderr,
7865                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7866                                         chunk_rec->objectid,
7867                                         chunk_rec->type,
7868                                         chunk_rec->offset,
7869                                         chunk_rec->length,
7870                                         chunk_rec->offset,
7871                                         chunk_rec->type_flags,
7872                                         block_group_rec->objectid,
7873                                         block_group_rec->type,
7874                                         block_group_rec->offset,
7875                                         block_group_rec->offset,
7876                                         block_group_rec->objectid,
7877                                         block_group_rec->flags);
7878                         ret = -1;
7879                 } else {
7880                         list_del_init(&block_group_rec->list);
7881                         chunk_rec->bg_rec = block_group_rec;
7882                 }
7883         } else {
7884                 if (!silent)
7885                         fprintf(stderr,
7886                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7887                                 chunk_rec->objectid,
7888                                 chunk_rec->type,
7889                                 chunk_rec->offset,
7890                                 chunk_rec->length,
7891                                 chunk_rec->offset,
7892                                 chunk_rec->type_flags);
7893                 ret = 1;
7894         }
7895
7896         if (metadump_v2)
7897                 return ret;
7898
7899         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7900                                     chunk_rec->num_stripes);
7901         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7902                 devid = chunk_rec->stripes[i].devid;
7903                 offset = chunk_rec->stripes[i].offset;
7904                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7905                                                        devid, offset, length);
7906                 if (dev_extent_item) {
7907                         dev_extent_rec = container_of(dev_extent_item,
7908                                                 struct device_extent_record,
7909                                                 cache);
7910                         if (dev_extent_rec->objectid != devid ||
7911                             dev_extent_rec->offset != offset ||
7912                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7913                             dev_extent_rec->length != length) {
7914                                 if (!silent)
7915                                         fprintf(stderr,
7916                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7917                                                 chunk_rec->objectid,
7918                                                 chunk_rec->type,
7919                                                 chunk_rec->offset,
7920                                                 chunk_rec->stripes[i].devid,
7921                                                 chunk_rec->stripes[i].offset,
7922                                                 dev_extent_rec->objectid,
7923                                                 dev_extent_rec->offset,
7924                                                 dev_extent_rec->length);
7925                                 ret = -1;
7926                         } else {
7927                                 list_move(&dev_extent_rec->chunk_list,
7928                                           &chunk_rec->dextents);
7929                         }
7930                 } else {
7931                         if (!silent)
7932                                 fprintf(stderr,
7933                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7934                                         chunk_rec->objectid,
7935                                         chunk_rec->type,
7936                                         chunk_rec->offset,
7937                                         chunk_rec->stripes[i].devid,
7938                                         chunk_rec->stripes[i].offset);
7939                         ret = -1;
7940                 }
7941         }
7942         return ret;
7943 }
7944
7945 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7946 int check_chunks(struct cache_tree *chunk_cache,
7947                  struct block_group_tree *block_group_cache,
7948                  struct device_extent_tree *dev_extent_cache,
7949                  struct list_head *good, struct list_head *bad,
7950                  struct list_head *rebuild, int silent)
7951 {
7952         struct cache_extent *chunk_item;
7953         struct chunk_record *chunk_rec;
7954         struct block_group_record *bg_rec;
7955         struct device_extent_record *dext_rec;
7956         int err;
7957         int ret = 0;
7958
7959         chunk_item = first_cache_extent(chunk_cache);
7960         while (chunk_item) {
7961                 chunk_rec = container_of(chunk_item, struct chunk_record,
7962                                          cache);
7963                 err = check_chunk_refs(chunk_rec, block_group_cache,
7964                                        dev_extent_cache, silent);
7965                 if (err < 0)
7966                         ret = err;
7967                 if (err == 0 && good)
7968                         list_add_tail(&chunk_rec->list, good);
7969                 if (err > 0 && rebuild)
7970                         list_add_tail(&chunk_rec->list, rebuild);
7971                 if (err < 0 && bad)
7972                         list_add_tail(&chunk_rec->list, bad);
7973                 chunk_item = next_cache_extent(chunk_item);
7974         }
7975
7976         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7977                 if (!silent)
7978                         fprintf(stderr,
7979                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7980                                 bg_rec->objectid,
7981                                 bg_rec->offset,
7982                                 bg_rec->flags);
7983                 if (!ret)
7984                         ret = 1;
7985         }
7986
7987         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7988                             chunk_list) {
7989                 if (!silent)
7990                         fprintf(stderr,
7991                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7992                                 dext_rec->objectid,
7993                                 dext_rec->offset,
7994                                 dext_rec->length);
7995                 if (!ret)
7996                         ret = 1;
7997         }
7998         return ret;
7999 }
8000
8001
8002 static int check_device_used(struct device_record *dev_rec,
8003                              struct device_extent_tree *dext_cache)
8004 {
8005         struct cache_extent *cache;
8006         struct device_extent_record *dev_extent_rec;
8007         u64 total_byte = 0;
8008
8009         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8010         while (cache) {
8011                 dev_extent_rec = container_of(cache,
8012                                               struct device_extent_record,
8013                                               cache);
8014                 if (dev_extent_rec->objectid != dev_rec->devid)
8015                         break;
8016
8017                 list_del_init(&dev_extent_rec->device_list);
8018                 total_byte += dev_extent_rec->length;
8019                 cache = next_cache_extent(cache);
8020         }
8021
8022         if (total_byte != dev_rec->byte_used) {
8023                 fprintf(stderr,
8024                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8025                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8026                         dev_rec->type, dev_rec->offset);
8027                 return -1;
8028         } else {
8029                 return 0;
8030         }
8031 }
8032
8033 /* check btrfs_dev_item -> btrfs_dev_extent */
8034 static int check_devices(struct rb_root *dev_cache,
8035                          struct device_extent_tree *dev_extent_cache)
8036 {
8037         struct rb_node *dev_node;
8038         struct device_record *dev_rec;
8039         struct device_extent_record *dext_rec;
8040         int err;
8041         int ret = 0;
8042
8043         dev_node = rb_first(dev_cache);
8044         while (dev_node) {
8045                 dev_rec = container_of(dev_node, struct device_record, node);
8046                 err = check_device_used(dev_rec, dev_extent_cache);
8047                 if (err)
8048                         ret = err;
8049
8050                 dev_node = rb_next(dev_node);
8051         }
8052         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8053                             device_list) {
8054                 fprintf(stderr,
8055                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8056                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8057                 if (!ret)
8058                         ret = 1;
8059         }
8060         return ret;
8061 }
8062
8063 static int add_root_item_to_list(struct list_head *head,
8064                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8065                                   u8 level, u8 drop_level,
8066                                   int level_size, struct btrfs_key *drop_key)
8067 {
8068
8069         struct root_item_record *ri_rec;
8070         ri_rec = malloc(sizeof(*ri_rec));
8071         if (!ri_rec)
8072                 return -ENOMEM;
8073         ri_rec->bytenr = bytenr;
8074         ri_rec->objectid = objectid;
8075         ri_rec->level = level;
8076         ri_rec->level_size = level_size;
8077         ri_rec->drop_level = drop_level;
8078         ri_rec->last_snapshot = last_snapshot;
8079         if (drop_key)
8080                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8081         list_add_tail(&ri_rec->list, head);
8082
8083         return 0;
8084 }
8085
8086 static void free_root_item_list(struct list_head *list)
8087 {
8088         struct root_item_record *ri_rec;
8089
8090         while (!list_empty(list)) {
8091                 ri_rec = list_first_entry(list, struct root_item_record,
8092                                           list);
8093                 list_del_init(&ri_rec->list);
8094                 free(ri_rec);
8095         }
8096 }
8097
8098 static int deal_root_from_list(struct list_head *list,
8099                                struct btrfs_root *root,
8100                                struct block_info *bits,
8101                                int bits_nr,
8102                                struct cache_tree *pending,
8103                                struct cache_tree *seen,
8104                                struct cache_tree *reada,
8105                                struct cache_tree *nodes,
8106                                struct cache_tree *extent_cache,
8107                                struct cache_tree *chunk_cache,
8108                                struct rb_root *dev_cache,
8109                                struct block_group_tree *block_group_cache,
8110                                struct device_extent_tree *dev_extent_cache)
8111 {
8112         int ret = 0;
8113         u64 last;
8114
8115         while (!list_empty(list)) {
8116                 struct root_item_record *rec;
8117                 struct extent_buffer *buf;
8118                 rec = list_entry(list->next,
8119                                  struct root_item_record, list);
8120                 last = 0;
8121                 buf = read_tree_block(root->fs_info->tree_root,
8122                                       rec->bytenr, rec->level_size, 0);
8123                 if (!extent_buffer_uptodate(buf)) {
8124                         free_extent_buffer(buf);
8125                         ret = -EIO;
8126                         break;
8127                 }
8128                 add_root_to_pending(buf, extent_cache, pending,
8129                                     seen, nodes, rec->objectid);
8130                 /*
8131                  * To rebuild extent tree, we need deal with snapshot
8132                  * one by one, otherwise we deal with node firstly which
8133                  * can maximize readahead.
8134                  */
8135                 while (1) {
8136                         ret = run_next_block(root, bits, bits_nr, &last,
8137                                              pending, seen, reada, nodes,
8138                                              extent_cache, chunk_cache,
8139                                              dev_cache, block_group_cache,
8140                                              dev_extent_cache, rec);
8141                         if (ret != 0)
8142                                 break;
8143                 }
8144                 free_extent_buffer(buf);
8145                 list_del(&rec->list);
8146                 free(rec);
8147                 if (ret < 0)
8148                         break;
8149         }
8150         while (ret >= 0) {
8151                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8152                                      reada, nodes, extent_cache, chunk_cache,
8153                                      dev_cache, block_group_cache,
8154                                      dev_extent_cache, NULL);
8155                 if (ret != 0) {
8156                         if (ret > 0)
8157                                 ret = 0;
8158                         break;
8159                 }
8160         }
8161         return ret;
8162 }
8163
8164 static int check_chunks_and_extents(struct btrfs_root *root)
8165 {
8166         struct rb_root dev_cache;
8167         struct cache_tree chunk_cache;
8168         struct block_group_tree block_group_cache;
8169         struct device_extent_tree dev_extent_cache;
8170         struct cache_tree extent_cache;
8171         struct cache_tree seen;
8172         struct cache_tree pending;
8173         struct cache_tree reada;
8174         struct cache_tree nodes;
8175         struct extent_io_tree excluded_extents;
8176         struct cache_tree corrupt_blocks;
8177         struct btrfs_path path;
8178         struct btrfs_key key;
8179         struct btrfs_key found_key;
8180         int ret, err = 0;
8181         struct block_info *bits;
8182         int bits_nr;
8183         struct extent_buffer *leaf;
8184         int slot;
8185         struct btrfs_root_item ri;
8186         struct list_head dropping_trees;
8187         struct list_head normal_trees;
8188         struct btrfs_root *root1;
8189         u64 objectid;
8190         u32 level_size;
8191         u8 level;
8192
8193         dev_cache = RB_ROOT;
8194         cache_tree_init(&chunk_cache);
8195         block_group_tree_init(&block_group_cache);
8196         device_extent_tree_init(&dev_extent_cache);
8197
8198         cache_tree_init(&extent_cache);
8199         cache_tree_init(&seen);
8200         cache_tree_init(&pending);
8201         cache_tree_init(&nodes);
8202         cache_tree_init(&reada);
8203         cache_tree_init(&corrupt_blocks);
8204         extent_io_tree_init(&excluded_extents);
8205         INIT_LIST_HEAD(&dropping_trees);
8206         INIT_LIST_HEAD(&normal_trees);
8207
8208         if (repair) {
8209                 root->fs_info->excluded_extents = &excluded_extents;
8210                 root->fs_info->fsck_extent_cache = &extent_cache;
8211                 root->fs_info->free_extent_hook = free_extent_hook;
8212                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8213         }
8214
8215         bits_nr = 1024;
8216         bits = malloc(bits_nr * sizeof(struct block_info));
8217         if (!bits) {
8218                 perror("malloc");
8219                 exit(1);
8220         }
8221
8222         if (ctx.progress_enabled) {
8223                 ctx.tp = TASK_EXTENTS;
8224                 task_start(ctx.info);
8225         }
8226
8227 again:
8228         root1 = root->fs_info->tree_root;
8229         level = btrfs_header_level(root1->node);
8230         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8231                                     root1->node->start, 0, level, 0,
8232                                     root1->nodesize, NULL);
8233         if (ret < 0)
8234                 goto out;
8235         root1 = root->fs_info->chunk_root;
8236         level = btrfs_header_level(root1->node);
8237         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8238                                     root1->node->start, 0, level, 0,
8239                                     root1->nodesize, NULL);
8240         if (ret < 0)
8241                 goto out;
8242         btrfs_init_path(&path);
8243         key.offset = 0;
8244         key.objectid = 0;
8245         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8246         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8247                                         &key, &path, 0, 0);
8248         if (ret < 0)
8249                 goto out;
8250         while(1) {
8251                 leaf = path.nodes[0];
8252                 slot = path.slots[0];
8253                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8254                         ret = btrfs_next_leaf(root, &path);
8255                         if (ret != 0)
8256                                 break;
8257                         leaf = path.nodes[0];
8258                         slot = path.slots[0];
8259                 }
8260                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8261                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8262                         unsigned long offset;
8263                         u64 last_snapshot;
8264
8265                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8266                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8267                         last_snapshot = btrfs_root_last_snapshot(&ri);
8268                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8269                                 level = btrfs_root_level(&ri);
8270                                 level_size = root->nodesize;
8271                                 ret = add_root_item_to_list(&normal_trees,
8272                                                 found_key.objectid,
8273                                                 btrfs_root_bytenr(&ri),
8274                                                 last_snapshot, level,
8275                                                 0, level_size, NULL);
8276                                 if (ret < 0)
8277                                         goto out;
8278                         } else {
8279                                 level = btrfs_root_level(&ri);
8280                                 level_size = root->nodesize;
8281                                 objectid = found_key.objectid;
8282                                 btrfs_disk_key_to_cpu(&found_key,
8283                                                       &ri.drop_progress);
8284                                 ret = add_root_item_to_list(&dropping_trees,
8285                                                 objectid,
8286                                                 btrfs_root_bytenr(&ri),
8287                                                 last_snapshot, level,
8288                                                 ri.drop_level,
8289                                                 level_size, &found_key);
8290                                 if (ret < 0)
8291                                         goto out;
8292                         }
8293                 }
8294                 path.slots[0]++;
8295         }
8296         btrfs_release_path(&path);
8297
8298         /*
8299          * check_block can return -EAGAIN if it fixes something, please keep
8300          * this in mind when dealing with return values from these functions, if
8301          * we get -EAGAIN we want to fall through and restart the loop.
8302          */
8303         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8304                                   &seen, &reada, &nodes, &extent_cache,
8305                                   &chunk_cache, &dev_cache, &block_group_cache,
8306                                   &dev_extent_cache);
8307         if (ret < 0) {
8308                 if (ret == -EAGAIN)
8309                         goto loop;
8310                 goto out;
8311         }
8312         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8313                                   &pending, &seen, &reada, &nodes,
8314                                   &extent_cache, &chunk_cache, &dev_cache,
8315                                   &block_group_cache, &dev_extent_cache);
8316         if (ret < 0) {
8317                 if (ret == -EAGAIN)
8318                         goto loop;
8319                 goto out;
8320         }
8321
8322         ret = check_chunks(&chunk_cache, &block_group_cache,
8323                            &dev_extent_cache, NULL, NULL, NULL, 0);
8324         if (ret) {
8325                 if (ret == -EAGAIN)
8326                         goto loop;
8327                 err = ret;
8328         }
8329
8330         ret = check_extent_refs(root, &extent_cache);
8331         if (ret < 0) {
8332                 if (ret == -EAGAIN)
8333                         goto loop;
8334                 goto out;
8335         }
8336
8337         ret = check_devices(&dev_cache, &dev_extent_cache);
8338         if (ret && err)
8339                 ret = err;
8340
8341 out:
8342         task_stop(ctx.info);
8343         if (repair) {
8344                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8345                 extent_io_tree_cleanup(&excluded_extents);
8346                 root->fs_info->fsck_extent_cache = NULL;
8347                 root->fs_info->free_extent_hook = NULL;
8348                 root->fs_info->corrupt_blocks = NULL;
8349                 root->fs_info->excluded_extents = NULL;
8350         }
8351         free(bits);
8352         free_chunk_cache_tree(&chunk_cache);
8353         free_device_cache_tree(&dev_cache);
8354         free_block_group_tree(&block_group_cache);
8355         free_device_extent_tree(&dev_extent_cache);
8356         free_extent_cache_tree(&seen);
8357         free_extent_cache_tree(&pending);
8358         free_extent_cache_tree(&reada);
8359         free_extent_cache_tree(&nodes);
8360         return ret;
8361 loop:
8362         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8363         free_extent_cache_tree(&seen);
8364         free_extent_cache_tree(&pending);
8365         free_extent_cache_tree(&reada);
8366         free_extent_cache_tree(&nodes);
8367         free_chunk_cache_tree(&chunk_cache);
8368         free_block_group_tree(&block_group_cache);
8369         free_device_cache_tree(&dev_cache);
8370         free_device_extent_tree(&dev_extent_cache);
8371         free_extent_record_cache(root->fs_info, &extent_cache);
8372         free_root_item_list(&normal_trees);
8373         free_root_item_list(&dropping_trees);
8374         extent_io_tree_cleanup(&excluded_extents);
8375         goto again;
8376 }
8377
8378 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8379                            struct btrfs_root *root, int overwrite)
8380 {
8381         struct extent_buffer *c;
8382         struct extent_buffer *old = root->node;
8383         int level;
8384         int ret;
8385         struct btrfs_disk_key disk_key = {0,0,0};
8386
8387         level = 0;
8388
8389         if (overwrite) {
8390                 c = old;
8391                 extent_buffer_get(c);
8392                 goto init;
8393         }
8394         c = btrfs_alloc_free_block(trans, root,
8395                                    root->nodesize,
8396                                    root->root_key.objectid,
8397                                    &disk_key, level, 0, 0);
8398         if (IS_ERR(c)) {
8399                 c = old;
8400                 extent_buffer_get(c);
8401                 overwrite = 1;
8402         }
8403 init:
8404         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8405         btrfs_set_header_level(c, level);
8406         btrfs_set_header_bytenr(c, c->start);
8407         btrfs_set_header_generation(c, trans->transid);
8408         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8409         btrfs_set_header_owner(c, root->root_key.objectid);
8410
8411         write_extent_buffer(c, root->fs_info->fsid,
8412                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8413
8414         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8415                             btrfs_header_chunk_tree_uuid(c),
8416                             BTRFS_UUID_SIZE);
8417
8418         btrfs_mark_buffer_dirty(c);
8419         /*
8420          * this case can happen in the following case:
8421          *
8422          * 1.overwrite previous root.
8423          *
8424          * 2.reinit reloc data root, this is because we skip pin
8425          * down reloc data tree before which means we can allocate
8426          * same block bytenr here.
8427          */
8428         if (old->start == c->start) {
8429                 btrfs_set_root_generation(&root->root_item,
8430                                           trans->transid);
8431                 root->root_item.level = btrfs_header_level(root->node);
8432                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8433                                         &root->root_key, &root->root_item);
8434                 if (ret) {
8435                         free_extent_buffer(c);
8436                         return ret;
8437                 }
8438         }
8439         free_extent_buffer(old);
8440         root->node = c;
8441         add_root_to_dirty_list(root);
8442         return 0;
8443 }
8444
8445 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8446                                 struct extent_buffer *eb, int tree_root)
8447 {
8448         struct extent_buffer *tmp;
8449         struct btrfs_root_item *ri;
8450         struct btrfs_key key;
8451         u64 bytenr;
8452         u32 nodesize;
8453         int level = btrfs_header_level(eb);
8454         int nritems;
8455         int ret;
8456         int i;
8457
8458         /*
8459          * If we have pinned this block before, don't pin it again.
8460          * This can not only avoid forever loop with broken filesystem
8461          * but also give us some speedups.
8462          */
8463         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8464                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8465                 return 0;
8466
8467         btrfs_pin_extent(fs_info, eb->start, eb->len);
8468
8469         nodesize = btrfs_super_nodesize(fs_info->super_copy);
8470         nritems = btrfs_header_nritems(eb);
8471         for (i = 0; i < nritems; i++) {
8472                 if (level == 0) {
8473                         btrfs_item_key_to_cpu(eb, &key, i);
8474                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8475                                 continue;
8476                         /* Skip the extent root and reloc roots */
8477                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8478                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8479                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8480                                 continue;
8481                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8482                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8483
8484                         /*
8485                          * If at any point we start needing the real root we
8486                          * will have to build a stump root for the root we are
8487                          * in, but for now this doesn't actually use the root so
8488                          * just pass in extent_root.
8489                          */
8490                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8491                                               nodesize, 0);
8492                         if (!extent_buffer_uptodate(tmp)) {
8493                                 fprintf(stderr, "Error reading root block\n");
8494                                 return -EIO;
8495                         }
8496                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8497                         free_extent_buffer(tmp);
8498                         if (ret)
8499                                 return ret;
8500                 } else {
8501                         bytenr = btrfs_node_blockptr(eb, i);
8502
8503                         /* If we aren't the tree root don't read the block */
8504                         if (level == 1 && !tree_root) {
8505                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
8506                                 continue;
8507                         }
8508
8509                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8510                                               nodesize, 0);
8511                         if (!extent_buffer_uptodate(tmp)) {
8512                                 fprintf(stderr, "Error reading tree block\n");
8513                                 return -EIO;
8514                         }
8515                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8516                         free_extent_buffer(tmp);
8517                         if (ret)
8518                                 return ret;
8519                 }
8520         }
8521
8522         return 0;
8523 }
8524
8525 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8526 {
8527         int ret;
8528
8529         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8530         if (ret)
8531                 return ret;
8532
8533         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8534 }
8535
8536 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8537 {
8538         struct btrfs_block_group_cache *cache;
8539         struct btrfs_path *path;
8540         struct extent_buffer *leaf;
8541         struct btrfs_chunk *chunk;
8542         struct btrfs_key key;
8543         int ret;
8544         u64 start;
8545
8546         path = btrfs_alloc_path();
8547         if (!path)
8548                 return -ENOMEM;
8549
8550         key.objectid = 0;
8551         key.type = BTRFS_CHUNK_ITEM_KEY;
8552         key.offset = 0;
8553
8554         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8555         if (ret < 0) {
8556                 btrfs_free_path(path);
8557                 return ret;
8558         }
8559
8560         /*
8561          * We do this in case the block groups were screwed up and had alloc
8562          * bits that aren't actually set on the chunks.  This happens with
8563          * restored images every time and could happen in real life I guess.
8564          */
8565         fs_info->avail_data_alloc_bits = 0;
8566         fs_info->avail_metadata_alloc_bits = 0;
8567         fs_info->avail_system_alloc_bits = 0;
8568
8569         /* First we need to create the in-memory block groups */
8570         while (1) {
8571                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8572                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8573                         if (ret < 0) {
8574                                 btrfs_free_path(path);
8575                                 return ret;
8576                         }
8577                         if (ret) {
8578                                 ret = 0;
8579                                 break;
8580                         }
8581                 }
8582                 leaf = path->nodes[0];
8583                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8584                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8585                         path->slots[0]++;
8586                         continue;
8587                 }
8588
8589                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8590                                        struct btrfs_chunk);
8591                 btrfs_add_block_group(fs_info, 0,
8592                                       btrfs_chunk_type(leaf, chunk),
8593                                       key.objectid, key.offset,
8594                                       btrfs_chunk_length(leaf, chunk));
8595                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8596                                  key.offset + btrfs_chunk_length(leaf, chunk),
8597                                  GFP_NOFS);
8598                 path->slots[0]++;
8599         }
8600         start = 0;
8601         while (1) {
8602                 cache = btrfs_lookup_first_block_group(fs_info, start);
8603                 if (!cache)
8604                         break;
8605                 cache->cached = 1;
8606                 start = cache->key.objectid + cache->key.offset;
8607         }
8608
8609         btrfs_free_path(path);
8610         return 0;
8611 }
8612
8613 static int reset_balance(struct btrfs_trans_handle *trans,
8614                          struct btrfs_fs_info *fs_info)
8615 {
8616         struct btrfs_root *root = fs_info->tree_root;
8617         struct btrfs_path *path;
8618         struct extent_buffer *leaf;
8619         struct btrfs_key key;
8620         int del_slot, del_nr = 0;
8621         int ret;
8622         int found = 0;
8623
8624         path = btrfs_alloc_path();
8625         if (!path)
8626                 return -ENOMEM;
8627
8628         key.objectid = BTRFS_BALANCE_OBJECTID;
8629         key.type = BTRFS_BALANCE_ITEM_KEY;
8630         key.offset = 0;
8631
8632         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8633         if (ret) {
8634                 if (ret > 0)
8635                         ret = 0;
8636                 if (!ret)
8637                         goto reinit_data_reloc;
8638                 else
8639                         goto out;
8640         }
8641
8642         ret = btrfs_del_item(trans, root, path);
8643         if (ret)
8644                 goto out;
8645         btrfs_release_path(path);
8646
8647         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8648         key.type = BTRFS_ROOT_ITEM_KEY;
8649         key.offset = 0;
8650
8651         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8652         if (ret < 0)
8653                 goto out;
8654         while (1) {
8655                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8656                         if (!found)
8657                                 break;
8658
8659                         if (del_nr) {
8660                                 ret = btrfs_del_items(trans, root, path,
8661                                                       del_slot, del_nr);
8662                                 del_nr = 0;
8663                                 if (ret)
8664                                         goto out;
8665                         }
8666                         key.offset++;
8667                         btrfs_release_path(path);
8668
8669                         found = 0;
8670                         ret = btrfs_search_slot(trans, root, &key, path,
8671                                                 -1, 1);
8672                         if (ret < 0)
8673                                 goto out;
8674                         continue;
8675                 }
8676                 found = 1;
8677                 leaf = path->nodes[0];
8678                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8679                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8680                         break;
8681                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8682                         path->slots[0]++;
8683                         continue;
8684                 }
8685                 if (!del_nr) {
8686                         del_slot = path->slots[0];
8687                         del_nr = 1;
8688                 } else {
8689                         del_nr++;
8690                 }
8691                 path->slots[0]++;
8692         }
8693
8694         if (del_nr) {
8695                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8696                 if (ret)
8697                         goto out;
8698         }
8699         btrfs_release_path(path);
8700
8701 reinit_data_reloc:
8702         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8703         key.type = BTRFS_ROOT_ITEM_KEY;
8704         key.offset = (u64)-1;
8705         root = btrfs_read_fs_root(fs_info, &key);
8706         if (IS_ERR(root)) {
8707                 fprintf(stderr, "Error reading data reloc tree\n");
8708                 ret = PTR_ERR(root);
8709                 goto out;
8710         }
8711         record_root_in_trans(trans, root);
8712         ret = btrfs_fsck_reinit_root(trans, root, 0);
8713         if (ret)
8714                 goto out;
8715         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8716 out:
8717         btrfs_free_path(path);
8718         return ret;
8719 }
8720
8721 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8722                               struct btrfs_fs_info *fs_info)
8723 {
8724         u64 start = 0;
8725         int ret;
8726
8727         /*
8728          * The only reason we don't do this is because right now we're just
8729          * walking the trees we find and pinning down their bytes, we don't look
8730          * at any of the leaves.  In order to do mixed groups we'd have to check
8731          * the leaves of any fs roots and pin down the bytes for any file
8732          * extents we find.  Not hard but why do it if we don't have to?
8733          */
8734         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8735                 fprintf(stderr, "We don't support re-initing the extent tree "
8736                         "for mixed block groups yet, please notify a btrfs "
8737                         "developer you want to do this so they can add this "
8738                         "functionality.\n");
8739                 return -EINVAL;
8740         }
8741
8742         /*
8743          * first we need to walk all of the trees except the extent tree and pin
8744          * down the bytes that are in use so we don't overwrite any existing
8745          * metadata.
8746          */
8747         ret = pin_metadata_blocks(fs_info);
8748         if (ret) {
8749                 fprintf(stderr, "error pinning down used bytes\n");
8750                 return ret;
8751         }
8752
8753         /*
8754          * Need to drop all the block groups since we're going to recreate all
8755          * of them again.
8756          */
8757         btrfs_free_block_groups(fs_info);
8758         ret = reset_block_groups(fs_info);
8759         if (ret) {
8760                 fprintf(stderr, "error resetting the block groups\n");
8761                 return ret;
8762         }
8763
8764         /* Ok we can allocate now, reinit the extent root */
8765         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8766         if (ret) {
8767                 fprintf(stderr, "extent root initialization failed\n");
8768                 /*
8769                  * When the transaction code is updated we should end the
8770                  * transaction, but for now progs only knows about commit so
8771                  * just return an error.
8772                  */
8773                 return ret;
8774         }
8775
8776         /*
8777          * Now we have all the in-memory block groups setup so we can make
8778          * allocations properly, and the metadata we care about is safe since we
8779          * pinned all of it above.
8780          */
8781         while (1) {
8782                 struct btrfs_block_group_cache *cache;
8783
8784                 cache = btrfs_lookup_first_block_group(fs_info, start);
8785                 if (!cache)
8786                         break;
8787                 start = cache->key.objectid + cache->key.offset;
8788                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8789                                         &cache->key, &cache->item,
8790                                         sizeof(cache->item));
8791                 if (ret) {
8792                         fprintf(stderr, "Error adding block group\n");
8793                         return ret;
8794                 }
8795                 btrfs_extent_post_op(trans, fs_info->extent_root);
8796         }
8797
8798         ret = reset_balance(trans, fs_info);
8799         if (ret)
8800                 fprintf(stderr, "error reseting the pending balance\n");
8801
8802         return ret;
8803 }
8804
8805 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8806 {
8807         struct btrfs_path *path;
8808         struct btrfs_trans_handle *trans;
8809         struct btrfs_key key;
8810         int ret;
8811
8812         printf("Recowing metadata block %llu\n", eb->start);
8813         key.objectid = btrfs_header_owner(eb);
8814         key.type = BTRFS_ROOT_ITEM_KEY;
8815         key.offset = (u64)-1;
8816
8817         root = btrfs_read_fs_root(root->fs_info, &key);
8818         if (IS_ERR(root)) {
8819                 fprintf(stderr, "Couldn't find owner root %llu\n",
8820                         key.objectid);
8821                 return PTR_ERR(root);
8822         }
8823
8824         path = btrfs_alloc_path();
8825         if (!path)
8826                 return -ENOMEM;
8827
8828         trans = btrfs_start_transaction(root, 1);
8829         if (IS_ERR(trans)) {
8830                 btrfs_free_path(path);
8831                 return PTR_ERR(trans);
8832         }
8833
8834         path->lowest_level = btrfs_header_level(eb);
8835         if (path->lowest_level)
8836                 btrfs_node_key_to_cpu(eb, &key, 0);
8837         else
8838                 btrfs_item_key_to_cpu(eb, &key, 0);
8839
8840         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8841         btrfs_commit_transaction(trans, root);
8842         btrfs_free_path(path);
8843         return ret;
8844 }
8845
8846 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8847 {
8848         struct btrfs_path *path;
8849         struct btrfs_trans_handle *trans;
8850         struct btrfs_key key;
8851         int ret;
8852
8853         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8854                bad->key.type, bad->key.offset);
8855         key.objectid = bad->root_id;
8856         key.type = BTRFS_ROOT_ITEM_KEY;
8857         key.offset = (u64)-1;
8858
8859         root = btrfs_read_fs_root(root->fs_info, &key);
8860         if (IS_ERR(root)) {
8861                 fprintf(stderr, "Couldn't find owner root %llu\n",
8862                         key.objectid);
8863                 return PTR_ERR(root);
8864         }
8865
8866         path = btrfs_alloc_path();
8867         if (!path)
8868                 return -ENOMEM;
8869
8870         trans = btrfs_start_transaction(root, 1);
8871         if (IS_ERR(trans)) {
8872                 btrfs_free_path(path);
8873                 return PTR_ERR(trans);
8874         }
8875
8876         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8877         if (ret) {
8878                 if (ret > 0)
8879                         ret = 0;
8880                 goto out;
8881         }
8882         ret = btrfs_del_item(trans, root, path);
8883 out:
8884         btrfs_commit_transaction(trans, root);
8885         btrfs_free_path(path);
8886         return ret;
8887 }
8888
8889 static int zero_log_tree(struct btrfs_root *root)
8890 {
8891         struct btrfs_trans_handle *trans;
8892         int ret;
8893
8894         trans = btrfs_start_transaction(root, 1);
8895         if (IS_ERR(trans)) {
8896                 ret = PTR_ERR(trans);
8897                 return ret;
8898         }
8899         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8900         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8901         ret = btrfs_commit_transaction(trans, root);
8902         return ret;
8903 }
8904
8905 static int populate_csum(struct btrfs_trans_handle *trans,
8906                          struct btrfs_root *csum_root, char *buf, u64 start,
8907                          u64 len)
8908 {
8909         u64 offset = 0;
8910         u64 sectorsize;
8911         int ret = 0;
8912
8913         while (offset < len) {
8914                 sectorsize = csum_root->sectorsize;
8915                 ret = read_extent_data(csum_root, buf, start + offset,
8916                                        &sectorsize, 0);
8917                 if (ret)
8918                         break;
8919                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8920                                             start + offset, buf, sectorsize);
8921                 if (ret)
8922                         break;
8923                 offset += sectorsize;
8924         }
8925         return ret;
8926 }
8927
8928 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8929                                       struct btrfs_root *csum_root,
8930                                       struct btrfs_root *cur_root)
8931 {
8932         struct btrfs_path *path;
8933         struct btrfs_key key;
8934         struct extent_buffer *node;
8935         struct btrfs_file_extent_item *fi;
8936         char *buf = NULL;
8937         u64 start = 0;
8938         u64 len = 0;
8939         int slot = 0;
8940         int ret = 0;
8941
8942         path = btrfs_alloc_path();
8943         if (!path)
8944                 return -ENOMEM;
8945         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8946         if (!buf) {
8947                 ret = -ENOMEM;
8948                 goto out;
8949         }
8950
8951         key.objectid = 0;
8952         key.offset = 0;
8953         key.type = 0;
8954
8955         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8956         if (ret < 0)
8957                 goto out;
8958         /* Iterate all regular file extents and fill its csum */
8959         while (1) {
8960                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8961
8962                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8963                         goto next;
8964                 node = path->nodes[0];
8965                 slot = path->slots[0];
8966                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8967                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8968                         goto next;
8969                 start = btrfs_file_extent_disk_bytenr(node, fi);
8970                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8971
8972                 ret = populate_csum(trans, csum_root, buf, start, len);
8973                 if (ret == -EEXIST)
8974                         ret = 0;
8975                 if (ret < 0)
8976                         goto out;
8977 next:
8978                 /*
8979                  * TODO: if next leaf is corrupted, jump to nearest next valid
8980                  * leaf.
8981                  */
8982                 ret = btrfs_next_item(cur_root, path);
8983                 if (ret < 0)
8984                         goto out;
8985                 if (ret > 0) {
8986                         ret = 0;
8987                         goto out;
8988                 }
8989         }
8990
8991 out:
8992         btrfs_free_path(path);
8993         free(buf);
8994         return ret;
8995 }
8996
8997 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8998                                   struct btrfs_root *csum_root)
8999 {
9000         struct btrfs_fs_info *fs_info = csum_root->fs_info;
9001         struct btrfs_path *path;
9002         struct btrfs_root *tree_root = fs_info->tree_root;
9003         struct btrfs_root *cur_root;
9004         struct extent_buffer *node;
9005         struct btrfs_key key;
9006         int slot = 0;
9007         int ret = 0;
9008
9009         path = btrfs_alloc_path();
9010         if (!path)
9011                 return -ENOMEM;
9012
9013         key.objectid = BTRFS_FS_TREE_OBJECTID;
9014         key.offset = 0;
9015         key.type = BTRFS_ROOT_ITEM_KEY;
9016
9017         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9018         if (ret < 0)
9019                 goto out;
9020         if (ret > 0) {
9021                 ret = -ENOENT;
9022                 goto out;
9023         }
9024
9025         while (1) {
9026                 node = path->nodes[0];
9027                 slot = path->slots[0];
9028                 btrfs_item_key_to_cpu(node, &key, slot);
9029                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9030                         goto out;
9031                 if (key.type != BTRFS_ROOT_ITEM_KEY)
9032                         goto next;
9033                 if (!is_fstree(key.objectid))
9034                         goto next;
9035                 key.offset = (u64)-1;
9036
9037                 cur_root = btrfs_read_fs_root(fs_info, &key);
9038                 if (IS_ERR(cur_root) || !cur_root) {
9039                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9040                                 key.objectid);
9041                         goto out;
9042                 }
9043                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9044                                 cur_root);
9045                 if (ret < 0)
9046                         goto out;
9047 next:
9048                 ret = btrfs_next_item(tree_root, path);
9049                 if (ret > 0) {
9050                         ret = 0;
9051                         goto out;
9052                 }
9053                 if (ret < 0)
9054                         goto out;
9055         }
9056
9057 out:
9058         btrfs_free_path(path);
9059         return ret;
9060 }
9061
9062 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9063                                       struct btrfs_root *csum_root)
9064 {
9065         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9066         struct btrfs_path *path;
9067         struct btrfs_extent_item *ei;
9068         struct extent_buffer *leaf;
9069         char *buf;
9070         struct btrfs_key key;
9071         int ret;
9072
9073         path = btrfs_alloc_path();
9074         if (!path)
9075                 return -ENOMEM;
9076
9077         key.objectid = 0;
9078         key.type = BTRFS_EXTENT_ITEM_KEY;
9079         key.offset = 0;
9080
9081         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9082         if (ret < 0) {
9083                 btrfs_free_path(path);
9084                 return ret;
9085         }
9086
9087         buf = malloc(csum_root->sectorsize);
9088         if (!buf) {
9089                 btrfs_free_path(path);
9090                 return -ENOMEM;
9091         }
9092
9093         while (1) {
9094                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9095                         ret = btrfs_next_leaf(extent_root, path);
9096                         if (ret < 0)
9097                                 break;
9098                         if (ret) {
9099                                 ret = 0;
9100                                 break;
9101                         }
9102                 }
9103                 leaf = path->nodes[0];
9104
9105                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9106                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9107                         path->slots[0]++;
9108                         continue;
9109                 }
9110
9111                 ei = btrfs_item_ptr(leaf, path->slots[0],
9112                                     struct btrfs_extent_item);
9113                 if (!(btrfs_extent_flags(leaf, ei) &
9114                       BTRFS_EXTENT_FLAG_DATA)) {
9115                         path->slots[0]++;
9116                         continue;
9117                 }
9118
9119                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9120                                     key.offset);
9121                 if (ret)
9122                         break;
9123                 path->slots[0]++;
9124         }
9125
9126         btrfs_free_path(path);
9127         free(buf);
9128         return ret;
9129 }
9130
9131 /*
9132  * Recalculate the csum and put it into the csum tree.
9133  *
9134  * Extent tree init will wipe out all the extent info, so in that case, we
9135  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9136  * will use fs/subvol trees to init the csum tree.
9137  */
9138 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9139                           struct btrfs_root *csum_root,
9140                           int search_fs_tree)
9141 {
9142         if (search_fs_tree)
9143                 return fill_csum_tree_from_fs(trans, csum_root);
9144         else
9145                 return fill_csum_tree_from_extent(trans, csum_root);
9146 }
9147
9148 static void free_roots_info_cache(void)
9149 {
9150         if (!roots_info_cache)
9151                 return;
9152
9153         while (!cache_tree_empty(roots_info_cache)) {
9154                 struct cache_extent *entry;
9155                 struct root_item_info *rii;
9156
9157                 entry = first_cache_extent(roots_info_cache);
9158                 if (!entry)
9159                         break;
9160                 remove_cache_extent(roots_info_cache, entry);
9161                 rii = container_of(entry, struct root_item_info, cache_extent);
9162                 free(rii);
9163         }
9164
9165         free(roots_info_cache);
9166         roots_info_cache = NULL;
9167 }
9168
9169 static int build_roots_info_cache(struct btrfs_fs_info *info)
9170 {
9171         int ret = 0;
9172         struct btrfs_key key;
9173         struct extent_buffer *leaf;
9174         struct btrfs_path *path;
9175
9176         if (!roots_info_cache) {
9177                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9178                 if (!roots_info_cache)
9179                         return -ENOMEM;
9180                 cache_tree_init(roots_info_cache);
9181         }
9182
9183         path = btrfs_alloc_path();
9184         if (!path)
9185                 return -ENOMEM;
9186
9187         key.objectid = 0;
9188         key.type = BTRFS_EXTENT_ITEM_KEY;
9189         key.offset = 0;
9190
9191         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9192         if (ret < 0)
9193                 goto out;
9194         leaf = path->nodes[0];
9195
9196         while (1) {
9197                 struct btrfs_key found_key;
9198                 struct btrfs_extent_item *ei;
9199                 struct btrfs_extent_inline_ref *iref;
9200                 int slot = path->slots[0];
9201                 int type;
9202                 u64 flags;
9203                 u64 root_id;
9204                 u8 level;
9205                 struct cache_extent *entry;
9206                 struct root_item_info *rii;
9207
9208                 if (slot >= btrfs_header_nritems(leaf)) {
9209                         ret = btrfs_next_leaf(info->extent_root, path);
9210                         if (ret < 0) {
9211                                 break;
9212                         } else if (ret) {
9213                                 ret = 0;
9214                                 break;
9215                         }
9216                         leaf = path->nodes[0];
9217                         slot = path->slots[0];
9218                 }
9219
9220                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9221
9222                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9223                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9224                         goto next;
9225
9226                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9227                 flags = btrfs_extent_flags(leaf, ei);
9228
9229                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9230                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9231                         goto next;
9232
9233                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9234                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9235                         level = found_key.offset;
9236                 } else {
9237                         struct btrfs_tree_block_info *binfo;
9238
9239                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
9240                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9241                         level = btrfs_tree_block_level(leaf, binfo);
9242                 }
9243
9244                 /*
9245                  * For a root extent, it must be of the following type and the
9246                  * first (and only one) iref in the item.
9247                  */
9248                 type = btrfs_extent_inline_ref_type(leaf, iref);
9249                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9250                         goto next;
9251
9252                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9253                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9254                 if (!entry) {
9255                         rii = malloc(sizeof(struct root_item_info));
9256                         if (!rii) {
9257                                 ret = -ENOMEM;
9258                                 goto out;
9259                         }
9260                         rii->cache_extent.start = root_id;
9261                         rii->cache_extent.size = 1;
9262                         rii->level = (u8)-1;
9263                         entry = &rii->cache_extent;
9264                         ret = insert_cache_extent(roots_info_cache, entry);
9265                         ASSERT(ret == 0);
9266                 } else {
9267                         rii = container_of(entry, struct root_item_info,
9268                                            cache_extent);
9269                 }
9270
9271                 ASSERT(rii->cache_extent.start == root_id);
9272                 ASSERT(rii->cache_extent.size == 1);
9273
9274                 if (level > rii->level || rii->level == (u8)-1) {
9275                         rii->level = level;
9276                         rii->bytenr = found_key.objectid;
9277                         rii->gen = btrfs_extent_generation(leaf, ei);
9278                         rii->node_count = 1;
9279                 } else if (level == rii->level) {
9280                         rii->node_count++;
9281                 }
9282 next:
9283                 path->slots[0]++;
9284         }
9285
9286 out:
9287         btrfs_free_path(path);
9288
9289         return ret;
9290 }
9291
9292 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9293                                   struct btrfs_path *path,
9294                                   const struct btrfs_key *root_key,
9295                                   const int read_only_mode)
9296 {
9297         const u64 root_id = root_key->objectid;
9298         struct cache_extent *entry;
9299         struct root_item_info *rii;
9300         struct btrfs_root_item ri;
9301         unsigned long offset;
9302
9303         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9304         if (!entry) {
9305                 fprintf(stderr,
9306                         "Error: could not find extent items for root %llu\n",
9307                         root_key->objectid);
9308                 return -ENOENT;
9309         }
9310
9311         rii = container_of(entry, struct root_item_info, cache_extent);
9312         ASSERT(rii->cache_extent.start == root_id);
9313         ASSERT(rii->cache_extent.size == 1);
9314
9315         if (rii->node_count != 1) {
9316                 fprintf(stderr,
9317                         "Error: could not find btree root extent for root %llu\n",
9318                         root_id);
9319                 return -ENOENT;
9320         }
9321
9322         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9323         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9324
9325         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9326             btrfs_root_level(&ri) != rii->level ||
9327             btrfs_root_generation(&ri) != rii->gen) {
9328
9329                 /*
9330                  * If we're in repair mode but our caller told us to not update
9331                  * the root item, i.e. just check if it needs to be updated, don't
9332                  * print this message, since the caller will call us again shortly
9333                  * for the same root item without read only mode (the caller will
9334                  * open a transaction first).
9335                  */
9336                 if (!(read_only_mode && repair))
9337                         fprintf(stderr,
9338                                 "%sroot item for root %llu,"
9339                                 " current bytenr %llu, current gen %llu, current level %u,"
9340                                 " new bytenr %llu, new gen %llu, new level %u\n",
9341                                 (read_only_mode ? "" : "fixing "),
9342                                 root_id,
9343                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9344                                 btrfs_root_level(&ri),
9345                                 rii->bytenr, rii->gen, rii->level);
9346
9347                 if (btrfs_root_generation(&ri) > rii->gen) {
9348                         fprintf(stderr,
9349                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9350                                 root_id, btrfs_root_generation(&ri), rii->gen);
9351                         return -EINVAL;
9352                 }
9353
9354                 if (!read_only_mode) {
9355                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9356                         btrfs_set_root_level(&ri, rii->level);
9357                         btrfs_set_root_generation(&ri, rii->gen);
9358                         write_extent_buffer(path->nodes[0], &ri,
9359                                             offset, sizeof(ri));
9360                 }
9361
9362                 return 1;
9363         }
9364
9365         return 0;
9366 }
9367
9368 /*
9369  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9370  * caused read-only snapshots to be corrupted if they were created at a moment
9371  * when the source subvolume/snapshot had orphan items. The issue was that the
9372  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9373  * node instead of the post orphan cleanup root node.
9374  * So this function, and its callees, just detects and fixes those cases. Even
9375  * though the regression was for read-only snapshots, this function applies to
9376  * any snapshot/subvolume root.
9377  * This must be run before any other repair code - not doing it so, makes other
9378  * repair code delete or modify backrefs in the extent tree for example, which
9379  * will result in an inconsistent fs after repairing the root items.
9380  */
9381 static int repair_root_items(struct btrfs_fs_info *info)
9382 {
9383         struct btrfs_path *path = NULL;
9384         struct btrfs_key key;
9385         struct extent_buffer *leaf;
9386         struct btrfs_trans_handle *trans = NULL;
9387         int ret = 0;
9388         int bad_roots = 0;
9389         int need_trans = 0;
9390
9391         ret = build_roots_info_cache(info);
9392         if (ret)
9393                 goto out;
9394
9395         path = btrfs_alloc_path();
9396         if (!path) {
9397                 ret = -ENOMEM;
9398                 goto out;
9399         }
9400
9401         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9402         key.type = BTRFS_ROOT_ITEM_KEY;
9403         key.offset = 0;
9404
9405 again:
9406         /*
9407          * Avoid opening and committing transactions if a leaf doesn't have
9408          * any root items that need to be fixed, so that we avoid rotating
9409          * backup roots unnecessarily.
9410          */
9411         if (need_trans) {
9412                 trans = btrfs_start_transaction(info->tree_root, 1);
9413                 if (IS_ERR(trans)) {
9414                         ret = PTR_ERR(trans);
9415                         goto out;
9416                 }
9417         }
9418
9419         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9420                                 0, trans ? 1 : 0);
9421         if (ret < 0)
9422                 goto out;
9423         leaf = path->nodes[0];
9424
9425         while (1) {
9426                 struct btrfs_key found_key;
9427
9428                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9429                         int no_more_keys = find_next_key(path, &key);
9430
9431                         btrfs_release_path(path);
9432                         if (trans) {
9433                                 ret = btrfs_commit_transaction(trans,
9434                                                                info->tree_root);
9435                                 trans = NULL;
9436                                 if (ret < 0)
9437                                         goto out;
9438                         }
9439                         need_trans = 0;
9440                         if (no_more_keys)
9441                                 break;
9442                         goto again;
9443                 }
9444
9445                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9446
9447                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9448                         goto next;
9449                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9450                         goto next;
9451
9452                 ret = maybe_repair_root_item(info, path, &found_key,
9453                                              trans ? 0 : 1);
9454                 if (ret < 0)
9455                         goto out;
9456                 if (ret) {
9457                         if (!trans && repair) {
9458                                 need_trans = 1;
9459                                 key = found_key;
9460                                 btrfs_release_path(path);
9461                                 goto again;
9462                         }
9463                         bad_roots++;
9464                 }
9465 next:
9466                 path->slots[0]++;
9467         }
9468         ret = 0;
9469 out:
9470         free_roots_info_cache();
9471         btrfs_free_path(path);
9472         if (trans)
9473                 btrfs_commit_transaction(trans, info->tree_root);
9474         if (ret < 0)
9475                 return ret;
9476
9477         return bad_roots;
9478 }
9479
9480 const char * const cmd_check_usage[] = {
9481         "btrfs check [options] <device>",
9482         "Check structural inegrity of a filesystem (unmounted).",
9483         "Check structural inegrity of an unmounted filesystem. Verify internal",
9484         "trees' consistency and item connectivity. In the repair mode try to",
9485         "fix the problems found.",
9486         "WARNING: the repair mode is considered dangerous",
9487         "",
9488         "-s|--super <superblock>     use this superblock copy",
9489         "-b|--backup                 use the first valid backup root copy",
9490         "--repair                    try to repair the filesystem",
9491         "--readonly                  run in read-only mode (default)",
9492         "--init-csum-tree            create a new CRC tree",
9493         "--init-extent-tree          create a new extent tree",
9494         "--check-data-csum           verify checkums of data blocks",
9495         "-Q|--qgroup-report           print a report on qgroup consistency",
9496         "-E|--subvol-extents <subvolid>",
9497         "                            print subvolume extents and sharing state",
9498         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9499         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
9500         "-p|--progress               indicate progress",
9501         NULL
9502 };
9503
9504 int cmd_check(int argc, char **argv)
9505 {
9506         struct cache_tree root_cache;
9507         struct btrfs_root *root;
9508         struct btrfs_fs_info *info;
9509         u64 bytenr = 0;
9510         u64 subvolid = 0;
9511         u64 tree_root_bytenr = 0;
9512         u64 chunk_root_bytenr = 0;
9513         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9514         int ret;
9515         u64 num;
9516         int init_csum_tree = 0;
9517         int readonly = 0;
9518         int qgroup_report = 0;
9519         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9520
9521         while(1) {
9522                 int c;
9523                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9524                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9525                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
9526                 static const struct option long_options[] = {
9527                         { "super", required_argument, NULL, 's' },
9528                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9529                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9530                         { "init-csum-tree", no_argument, NULL,
9531                                 GETOPT_VAL_INIT_CSUM },
9532                         { "init-extent-tree", no_argument, NULL,
9533                                 GETOPT_VAL_INIT_EXTENT },
9534                         { "check-data-csum", no_argument, NULL,
9535                                 GETOPT_VAL_CHECK_CSUM },
9536                         { "backup", no_argument, NULL, 'b' },
9537                         { "subvol-extents", required_argument, NULL, 'E' },
9538                         { "qgroup-report", no_argument, NULL, 'Q' },
9539                         { "tree-root", required_argument, NULL, 'r' },
9540                         { "chunk-root", required_argument, NULL,
9541                                 GETOPT_VAL_CHUNK_TREE },
9542                         { "progress", no_argument, NULL, 'p' },
9543                         { NULL, 0, NULL, 0}
9544                 };
9545
9546                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9547                 if (c < 0)
9548                         break;
9549                 switch(c) {
9550                         case 'a': /* ignored */ break;
9551                         case 'b':
9552                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9553                                 break;
9554                         case 's':
9555                                 num = arg_strtou64(optarg);
9556                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9557                                         fprintf(stderr,
9558                                                 "ERROR: super mirror should be less than: %d\n",
9559                                                 BTRFS_SUPER_MIRROR_MAX);
9560                                         exit(1);
9561                                 }
9562                                 bytenr = btrfs_sb_offset(((int)num));
9563                                 printf("using SB copy %llu, bytenr %llu\n", num,
9564                                        (unsigned long long)bytenr);
9565                                 break;
9566                         case 'Q':
9567                                 qgroup_report = 1;
9568                                 break;
9569                         case 'E':
9570                                 subvolid = arg_strtou64(optarg);
9571                                 break;
9572                         case 'r':
9573                                 tree_root_bytenr = arg_strtou64(optarg);
9574                                 break;
9575                         case GETOPT_VAL_CHUNK_TREE:
9576                                 chunk_root_bytenr = arg_strtou64(optarg);
9577                                 break;
9578                         case 'p':
9579                                 ctx.progress_enabled = true;
9580                                 break;
9581                         case '?':
9582                         case 'h':
9583                                 usage(cmd_check_usage);
9584                         case GETOPT_VAL_REPAIR:
9585                                 printf("enabling repair mode\n");
9586                                 repair = 1;
9587                                 ctree_flags |= OPEN_CTREE_WRITES;
9588                                 break;
9589                         case GETOPT_VAL_READONLY:
9590                                 readonly = 1;
9591                                 break;
9592                         case GETOPT_VAL_INIT_CSUM:
9593                                 printf("Creating a new CRC tree\n");
9594                                 init_csum_tree = 1;
9595                                 repair = 1;
9596                                 ctree_flags |= OPEN_CTREE_WRITES;
9597                                 break;
9598                         case GETOPT_VAL_INIT_EXTENT:
9599                                 init_extent_tree = 1;
9600                                 ctree_flags |= (OPEN_CTREE_WRITES |
9601                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9602                                 repair = 1;
9603                                 break;
9604                         case GETOPT_VAL_CHECK_CSUM:
9605                                 check_data_csum = 1;
9606                                 break;
9607                 }
9608         }
9609
9610         if (check_argc_exact(argc - optind, 1))
9611                 usage(cmd_check_usage);
9612
9613         if (ctx.progress_enabled) {
9614                 ctx.tp = TASK_NOTHING;
9615                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9616         }
9617
9618         /* This check is the only reason for --readonly to exist */
9619         if (readonly && repair) {
9620                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9621                 exit(1);
9622         }
9623
9624         radix_tree_init();
9625         cache_tree_init(&root_cache);
9626
9627         if((ret = check_mounted(argv[optind])) < 0) {
9628                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9629                 goto err_out;
9630         } else if(ret) {
9631                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9632                 ret = -EBUSY;
9633                 goto err_out;
9634         }
9635
9636         /* only allow partial opening under repair mode */
9637         if (repair)
9638                 ctree_flags |= OPEN_CTREE_PARTIAL;
9639
9640         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9641                                   chunk_root_bytenr, ctree_flags);
9642         if (!info) {
9643                 fprintf(stderr, "Couldn't open file system\n");
9644                 ret = -EIO;
9645                 goto err_out;
9646         }
9647
9648         global_info = info;
9649         root = info->fs_root;
9650
9651         /*
9652          * repair mode will force us to commit transaction which
9653          * will make us fail to load log tree when mounting.
9654          */
9655         if (repair && btrfs_super_log_root(info->super_copy)) {
9656                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9657                 if (!ret) {
9658                         ret = 1;
9659                         goto close_out;
9660                 }
9661                 ret = zero_log_tree(root);
9662                 if (ret) {
9663                         fprintf(stderr, "fail to zero log tree\n");
9664                         goto close_out;
9665                 }
9666         }
9667
9668         uuid_unparse(info->super_copy->fsid, uuidbuf);
9669         if (qgroup_report) {
9670                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9671                        uuidbuf);
9672                 ret = qgroup_verify_all(info);
9673                 if (ret == 0)
9674                         ret = report_qgroups(1);
9675                 goto close_out;
9676         }
9677         if (subvolid) {
9678                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9679                        subvolid, argv[optind], uuidbuf);
9680                 ret = print_extent_state(info, subvolid);
9681                 goto close_out;
9682         }
9683         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9684
9685         if (!extent_buffer_uptodate(info->tree_root->node) ||
9686             !extent_buffer_uptodate(info->dev_root->node) ||
9687             !extent_buffer_uptodate(info->chunk_root->node)) {
9688                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9689                 ret = -EIO;
9690                 goto close_out;
9691         }
9692
9693         if (init_extent_tree || init_csum_tree) {
9694                 struct btrfs_trans_handle *trans;
9695
9696                 trans = btrfs_start_transaction(info->extent_root, 0);
9697                 if (IS_ERR(trans)) {
9698                         fprintf(stderr, "Error starting transaction\n");
9699                         ret = PTR_ERR(trans);
9700                         goto close_out;
9701                 }
9702
9703                 if (init_extent_tree) {
9704                         printf("Creating a new extent tree\n");
9705                         ret = reinit_extent_tree(trans, info);
9706                         if (ret)
9707                                 goto close_out;
9708                 }
9709
9710                 if (init_csum_tree) {
9711                         fprintf(stderr, "Reinit crc root\n");
9712                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9713                         if (ret) {
9714                                 fprintf(stderr, "crc root initialization failed\n");
9715                                 ret = -EIO;
9716                                 goto close_out;
9717                         }
9718
9719                         ret = fill_csum_tree(trans, info->csum_root,
9720                                              init_extent_tree);
9721                         if (ret) {
9722                                 fprintf(stderr, "crc refilling failed\n");
9723                                 return -EIO;
9724                         }
9725                 }
9726                 /*
9727                  * Ok now we commit and run the normal fsck, which will add
9728                  * extent entries for all of the items it finds.
9729                  */
9730                 ret = btrfs_commit_transaction(trans, info->extent_root);
9731                 if (ret)
9732                         goto close_out;
9733         }
9734         if (!extent_buffer_uptodate(info->extent_root->node)) {
9735                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9736                 ret = -EIO;
9737                 goto close_out;
9738         }
9739         if (!extent_buffer_uptodate(info->csum_root->node)) {
9740                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9741                 ret = -EIO;
9742                 goto close_out;
9743         }
9744
9745         if (!ctx.progress_enabled)
9746                 fprintf(stderr, "checking extents\n");
9747         ret = check_chunks_and_extents(root);
9748         if (ret)
9749                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9750
9751         ret = repair_root_items(info);
9752         if (ret < 0)
9753                 goto close_out;
9754         if (repair) {
9755                 fprintf(stderr, "Fixed %d roots.\n", ret);
9756                 ret = 0;
9757         } else if (ret > 0) {
9758                 fprintf(stderr,
9759                        "Found %d roots with an outdated root item.\n",
9760                        ret);
9761                 fprintf(stderr,
9762                         "Please run a filesystem check with the option --repair to fix them.\n");
9763                 ret = 1;
9764                 goto close_out;
9765         }
9766
9767         if (!ctx.progress_enabled) {
9768                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
9769                         fprintf(stderr, "checking free space tree\n");
9770                 else
9771                         fprintf(stderr, "checking free space cache\n");
9772         }
9773         ret = check_space_cache(root);
9774         if (ret)
9775                 goto out;
9776
9777         /*
9778          * We used to have to have these hole extents in between our real
9779          * extents so if we don't have this flag set we need to make sure there
9780          * are no gaps in the file extents for inodes, otherwise we can just
9781          * ignore it when this happens.
9782          */
9783         no_holes = btrfs_fs_incompat(root->fs_info,
9784                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9785         if (!ctx.progress_enabled)
9786                 fprintf(stderr, "checking fs roots\n");
9787         ret = check_fs_roots(root, &root_cache);
9788         if (ret)
9789                 goto out;
9790
9791         fprintf(stderr, "checking csums\n");
9792         ret = check_csums(root);
9793         if (ret)
9794                 goto out;
9795
9796         fprintf(stderr, "checking root refs\n");
9797         ret = check_root_refs(root, &root_cache);
9798         if (ret)
9799                 goto out;
9800
9801         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9802                 struct extent_buffer *eb;
9803
9804                 eb = list_first_entry(&root->fs_info->recow_ebs,
9805                                       struct extent_buffer, recow);
9806                 list_del_init(&eb->recow);
9807                 ret = recow_extent_buffer(root, eb);
9808                 if (ret)
9809                         break;
9810         }
9811
9812         while (!list_empty(&delete_items)) {
9813                 struct bad_item *bad;
9814
9815                 bad = list_first_entry(&delete_items, struct bad_item, list);
9816                 list_del_init(&bad->list);
9817                 if (repair)
9818                         ret = delete_bad_item(root, bad);
9819                 free(bad);
9820         }
9821
9822         if (info->quota_enabled) {
9823                 int err;
9824                 fprintf(stderr, "checking quota groups\n");
9825                 err = qgroup_verify_all(info);
9826                 if (err)
9827                         goto out;
9828         }
9829
9830         if (!list_empty(&root->fs_info->recow_ebs)) {
9831                 fprintf(stderr, "Transid errors in file system\n");
9832                 ret = 1;
9833         }
9834 out:
9835         /* Don't override original ret */
9836         if (ret)
9837                 report_qgroups(0);
9838         else
9839                 ret = report_qgroups(0);
9840         if (found_old_backref) { /*
9841                  * there was a disk format change when mixed
9842                  * backref was in testing tree. The old format
9843                  * existed about one week.
9844                  */
9845                 printf("\n * Found old mixed backref format. "
9846                        "The old format is not supported! *"
9847                        "\n * Please mount the FS in readonly mode, "
9848                        "backup data and re-format the FS. *\n\n");
9849                 ret = 1;
9850         }
9851         printf("found %llu bytes used err is %d\n",
9852                (unsigned long long)bytes_used, ret);
9853         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9854         printf("total tree bytes: %llu\n",
9855                (unsigned long long)total_btree_bytes);
9856         printf("total fs tree bytes: %llu\n",
9857                (unsigned long long)total_fs_tree_bytes);
9858         printf("total extent tree bytes: %llu\n",
9859                (unsigned long long)total_extent_tree_bytes);
9860         printf("btree space waste bytes: %llu\n",
9861                (unsigned long long)btree_space_waste);
9862         printf("file data blocks allocated: %llu\n referenced %llu\n",
9863                 (unsigned long long)data_bytes_allocated,
9864                 (unsigned long long)data_bytes_referenced);
9865
9866         free_root_recs_tree(&root_cache);
9867 close_out:
9868         close_ctree(root);
9869 err_out:
9870         if (ctx.progress_enabled)
9871                 task_deinit(ctx.info);
9872
9873         return ret;
9874 }