btrfs-progs: handler memory allocation failure in add_extent_rec
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "btrfsck.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
41 #include "backref.h"
42 #include "ulist.h"
43
44 enum task_position {
45         TASK_EXTENTS,
46         TASK_FREE_SPACE,
47         TASK_FS_ROOTS,
48         TASK_NOTHING, /* have to be the last element */
49 };
50
51 struct task_ctx {
52         int progress_enabled;
53         enum task_position tp;
54
55         struct task_info *info;
56 };
57
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75
76 static void *print_status_check(void *p)
77 {
78         struct task_ctx *priv = p;
79         const char work_indicator[] = { '.', 'o', 'O', 'o' };
80         uint32_t count = 0;
81         static char *task_position_string[] = {
82                 "checking extents",
83                 "checking free space cache",
84                 "checking fs roots",
85         };
86
87         task_period_start(priv->info, 1000 /* 1s */);
88
89         if (priv->tp == TASK_NOTHING)
90                 return NULL;
91
92         while (1) {
93                 printf("%s [%c]\r", task_position_string[priv->tp],
94                                 work_indicator[count % 4]);
95                 count++;
96                 fflush(stdout);
97                 task_period_wait(priv->info);
98         }
99         return NULL;
100 }
101
102 static int print_status_return(void *p)
103 {
104         printf("\n");
105         fflush(stdout);
106
107         return 0;
108 }
109
110 struct extent_backref {
111         struct list_head list;
112         unsigned int is_data:1;
113         unsigned int found_extent_tree:1;
114         unsigned int full_backref:1;
115         unsigned int found_ref:1;
116         unsigned int broken:1;
117 };
118
119 struct data_backref {
120         struct extent_backref node;
121         union {
122                 u64 parent;
123                 u64 root;
124         };
125         u64 owner;
126         u64 offset;
127         u64 disk_bytenr;
128         u64 bytes;
129         u64 ram_bytes;
130         u32 num_refs;
131         u32 found_ref;
132 };
133
134 /*
135  * Much like data_backref, just removed the undetermined members
136  * and change it to use list_head.
137  * During extent scan, it is stored in root->orphan_data_extent.
138  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
139  */
140 struct orphan_data_extent {
141         struct list_head list;
142         u64 root;
143         u64 objectid;
144         u64 offset;
145         u64 disk_bytenr;
146         u64 disk_len;
147 };
148
149 struct tree_backref {
150         struct extent_backref node;
151         union {
152                 u64 parent;
153                 u64 root;
154         };
155 };
156
157 struct extent_record {
158         struct list_head backrefs;
159         struct list_head dups;
160         struct list_head list;
161         struct cache_extent cache;
162         struct btrfs_disk_key parent_key;
163         u64 start;
164         u64 max_size;
165         u64 nr;
166         u64 refs;
167         u64 extent_item_refs;
168         u64 generation;
169         u64 parent_generation;
170         u64 info_objectid;
171         u32 num_duplicates;
172         u8 info_level;
173         int flag_block_full_backref;
174         unsigned int found_rec:1;
175         unsigned int content_checked:1;
176         unsigned int owner_ref_checked:1;
177         unsigned int is_root:1;
178         unsigned int metadata:1;
179         unsigned int bad_full_backref:1;
180         unsigned int crossing_stripes:1;
181         unsigned int wrong_chunk_type:1;
182 };
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         unsigned int filetype:8;
190         int errors;
191         unsigned int ref_type;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 struct root_item_record {
199         struct list_head list;
200         u64 objectid;
201         u64 bytenr;
202         u64 last_snapshot;
203         u8 level;
204         u8 drop_level;
205         int level_size;
206         struct btrfs_key drop_key;
207 };
208
209 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
211 #define REF_ERR_NO_INODE_REF            (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
214 #define REF_ERR_DUP_INODE_REF           (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF             (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
222
223 struct file_extent_hole {
224         struct rb_node node;
225         u64 start;
226         u64 len;
227 };
228
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
231 {
232         struct file_extent_hole *hole;
233
234         if (RB_EMPTY_ROOT(holes))
235                 return (u64)-1;
236
237         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
238         return hole->start;
239 }
240
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
242 {
243         struct file_extent_hole *hole1;
244         struct file_extent_hole *hole2;
245
246         hole1 = rb_entry(node1, struct file_extent_hole, node);
247         hole2 = rb_entry(node2, struct file_extent_hole, node);
248
249         if (hole1->start > hole2->start)
250                 return -1;
251         if (hole1->start < hole2->start)
252                 return 1;
253         /* Now hole1->start == hole2->start */
254         if (hole1->len >= hole2->len)
255                 /*
256                  * Hole 1 will be merge center
257                  * Same hole will be merged later
258                  */
259                 return -1;
260         /* Hole 2 will be merge center */
261         return 1;
262 }
263
264 /*
265  * Add a hole to the record
266  *
267  * This will do hole merge for copy_file_extent_holes(),
268  * which will ensure there won't be continuous holes.
269  */
270 static int add_file_extent_hole(struct rb_root *holes,
271                                 u64 start, u64 len)
272 {
273         struct file_extent_hole *hole;
274         struct file_extent_hole *prev = NULL;
275         struct file_extent_hole *next = NULL;
276
277         hole = malloc(sizeof(*hole));
278         if (!hole)
279                 return -ENOMEM;
280         hole->start = start;
281         hole->len = len;
282         /* Since compare will not return 0, no -EEXIST will happen */
283         rb_insert(holes, &hole->node, compare_hole);
284
285         /* simple merge with previous hole */
286         if (rb_prev(&hole->node))
287                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
288                                 node);
289         if (prev && prev->start + prev->len >= hole->start) {
290                 hole->len = hole->start + hole->len - prev->start;
291                 hole->start = prev->start;
292                 rb_erase(&prev->node, holes);
293                 free(prev);
294                 prev = NULL;
295         }
296
297         /* iterate merge with next holes */
298         while (1) {
299                 if (!rb_next(&hole->node))
300                         break;
301                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
302                                         node);
303                 if (hole->start + hole->len >= next->start) {
304                         if (hole->start + hole->len <= next->start + next->len)
305                                 hole->len = next->start + next->len -
306                                             hole->start;
307                         rb_erase(&next->node, holes);
308                         free(next);
309                         next = NULL;
310                 } else
311                         break;
312         }
313         return 0;
314 }
315
316 static int compare_hole_range(struct rb_node *node, void *data)
317 {
318         struct file_extent_hole *hole;
319         u64 start;
320
321         hole = (struct file_extent_hole *)data;
322         start = hole->start;
323
324         hole = rb_entry(node, struct file_extent_hole, node);
325         if (start < hole->start)
326                 return -1;
327         if (start >= hole->start && start < hole->start + hole->len)
328                 return 0;
329         return 1;
330 }
331
332 /*
333  * Delete a hole in the record
334  *
335  * This will do the hole split and is much restrict than add.
336  */
337 static int del_file_extent_hole(struct rb_root *holes,
338                                 u64 start, u64 len)
339 {
340         struct file_extent_hole *hole;
341         struct file_extent_hole tmp;
342         u64 prev_start = 0;
343         u64 prev_len = 0;
344         u64 next_start = 0;
345         u64 next_len = 0;
346         struct rb_node *node;
347         int have_prev = 0;
348         int have_next = 0;
349         int ret = 0;
350
351         tmp.start = start;
352         tmp.len = len;
353         node = rb_search(holes, &tmp, compare_hole_range, NULL);
354         if (!node)
355                 return -EEXIST;
356         hole = rb_entry(node, struct file_extent_hole, node);
357         if (start + len > hole->start + hole->len)
358                 return -EEXIST;
359
360         /*
361          * Now there will be no overflap, delete the hole and re-add the
362          * split(s) if they exists.
363          */
364         if (start > hole->start) {
365                 prev_start = hole->start;
366                 prev_len = start - hole->start;
367                 have_prev = 1;
368         }
369         if (hole->start + hole->len > start + len) {
370                 next_start = start + len;
371                 next_len = hole->start + hole->len - start - len;
372                 have_next = 1;
373         }
374         rb_erase(node, holes);
375         free(hole);
376         if (have_prev) {
377                 ret = add_file_extent_hole(holes, prev_start, prev_len);
378                 if (ret < 0)
379                         return ret;
380         }
381         if (have_next) {
382                 ret = add_file_extent_hole(holes, next_start, next_len);
383                 if (ret < 0)
384                         return ret;
385         }
386         return 0;
387 }
388
389 static int copy_file_extent_holes(struct rb_root *dst,
390                                   struct rb_root *src)
391 {
392         struct file_extent_hole *hole;
393         struct rb_node *node;
394         int ret = 0;
395
396         node = rb_first(src);
397         while (node) {
398                 hole = rb_entry(node, struct file_extent_hole, node);
399                 ret = add_file_extent_hole(dst, hole->start, hole->len);
400                 if (ret)
401                         break;
402                 node = rb_next(node);
403         }
404         return ret;
405 }
406
407 static void free_file_extent_holes(struct rb_root *holes)
408 {
409         struct rb_node *node;
410         struct file_extent_hole *hole;
411
412         node = rb_first(holes);
413         while (node) {
414                 hole = rb_entry(node, struct file_extent_hole, node);
415                 rb_erase(node, holes);
416                 free(hole);
417                 node = rb_first(holes);
418         }
419 }
420
421 struct inode_record {
422         struct list_head backrefs;
423         unsigned int checked:1;
424         unsigned int merging:1;
425         unsigned int found_inode_item:1;
426         unsigned int found_dir_item:1;
427         unsigned int found_file_extent:1;
428         unsigned int found_csum_item:1;
429         unsigned int some_csum_missing:1;
430         unsigned int nodatasum:1;
431         int errors;
432
433         u64 ino;
434         u32 nlink;
435         u32 imode;
436         u64 isize;
437         u64 nbytes;
438
439         u32 found_link;
440         u64 found_size;
441         u64 extent_start;
442         u64 extent_end;
443         struct rb_root holes;
444         struct list_head orphan_extents;
445
446         u32 refs;
447 };
448
449 #define I_ERR_NO_INODE_ITEM             (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
464
465 struct root_backref {
466         struct list_head list;
467         unsigned int found_dir_item:1;
468         unsigned int found_dir_index:1;
469         unsigned int found_back_ref:1;
470         unsigned int found_forward_ref:1;
471         unsigned int reachable:1;
472         int errors;
473         u64 ref_root;
474         u64 dir;
475         u64 index;
476         u16 namelen;
477         char name[0];
478 };
479
480 struct root_record {
481         struct list_head backrefs;
482         struct cache_extent cache;
483         unsigned int found_root_item:1;
484         u64 objectid;
485         u32 found_ref;
486 };
487
488 struct ptr_node {
489         struct cache_extent cache;
490         void *data;
491 };
492
493 struct shared_node {
494         struct cache_extent cache;
495         struct cache_tree root_cache;
496         struct cache_tree inode_cache;
497         struct inode_record *current;
498         u32 refs;
499 };
500
501 struct block_info {
502         u64 start;
503         u32 size;
504 };
505
506 struct walk_control {
507         struct cache_tree shared;
508         struct shared_node *nodes[BTRFS_MAX_LEVEL];
509         int active_node;
510         int root_level;
511 };
512
513 struct bad_item {
514         struct btrfs_key key;
515         u64 root_id;
516         struct list_head list;
517 };
518
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
520
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522                                  struct btrfs_root *root)
523 {
524         if (root->last_trans != trans->transid) {
525                 root->track_dirty = 1;
526                 root->last_trans = trans->transid;
527                 root->commit_root = root->node;
528                 extent_buffer_get(root->node);
529         }
530 }
531
532 static u8 imode_to_type(u32 imode)
533 {
534 #define S_SHIFT 12
535         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
537                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
538                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
539                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
540                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
541                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
542                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
543         };
544
545         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
546 #undef S_SHIFT
547 }
548
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
550 {
551         struct device_record *rec1;
552         struct device_record *rec2;
553
554         rec1 = rb_entry(node1, struct device_record, node);
555         rec2 = rb_entry(node2, struct device_record, node);
556         if (rec1->devid > rec2->devid)
557                 return -1;
558         else if (rec1->devid < rec2->devid)
559                 return 1;
560         else
561                 return 0;
562 }
563
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
565 {
566         struct inode_record *rec;
567         struct inode_backref *backref;
568         struct inode_backref *orig;
569         struct inode_backref *tmp;
570         struct orphan_data_extent *src_orphan;
571         struct orphan_data_extent *dst_orphan;
572         size_t size;
573         int ret;
574
575         rec = malloc(sizeof(*rec));
576         if (!rec)
577                 return ERR_PTR(-ENOMEM);
578         memcpy(rec, orig_rec, sizeof(*rec));
579         rec->refs = 1;
580         INIT_LIST_HEAD(&rec->backrefs);
581         INIT_LIST_HEAD(&rec->orphan_extents);
582         rec->holes = RB_ROOT;
583
584         list_for_each_entry(orig, &orig_rec->backrefs, list) {
585                 size = sizeof(*orig) + orig->namelen + 1;
586                 backref = malloc(size);
587                 if (!backref) {
588                         ret = -ENOMEM;
589                         goto cleanup;
590                 }
591                 memcpy(backref, orig, size);
592                 list_add_tail(&backref->list, &rec->backrefs);
593         }
594         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
595                 dst_orphan = malloc(sizeof(*dst_orphan));
596                 if (!dst_orphan) {
597                         ret = -ENOMEM;
598                         goto cleanup;
599                 }
600                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
601                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
602         }
603         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
604         BUG_ON(ret < 0);
605
606         return rec;
607
608 cleanup:
609         if (!list_empty(&rec->backrefs))
610                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
611                         list_del(&orig->list);
612                         free(orig);
613                 }
614
615         if (!list_empty(&rec->orphan_extents))
616                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
617                         list_del(&orig->list);
618                         free(orig);
619                 }
620
621         free(rec);
622
623         return ERR_PTR(ret);
624 }
625
626 static void print_orphan_data_extents(struct list_head *orphan_extents,
627                                       u64 objectid)
628 {
629         struct orphan_data_extent *orphan;
630
631         if (list_empty(orphan_extents))
632                 return;
633         printf("The following data extent is lost in tree %llu:\n",
634                objectid);
635         list_for_each_entry(orphan, orphan_extents, list) {
636                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
637                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
638                        orphan->disk_len);
639         }
640 }
641
642 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
643 {
644         u64 root_objectid = root->root_key.objectid;
645         int errors = rec->errors;
646
647         if (!errors)
648                 return;
649         /* reloc root errors, we print its corresponding fs root objectid*/
650         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
651                 root_objectid = root->root_key.offset;
652                 fprintf(stderr, "reloc");
653         }
654         fprintf(stderr, "root %llu inode %llu errors %x",
655                 (unsigned long long) root_objectid,
656                 (unsigned long long) rec->ino, rec->errors);
657
658         if (errors & I_ERR_NO_INODE_ITEM)
659                 fprintf(stderr, ", no inode item");
660         if (errors & I_ERR_NO_ORPHAN_ITEM)
661                 fprintf(stderr, ", no orphan item");
662         if (errors & I_ERR_DUP_INODE_ITEM)
663                 fprintf(stderr, ", dup inode item");
664         if (errors & I_ERR_DUP_DIR_INDEX)
665                 fprintf(stderr, ", dup dir index");
666         if (errors & I_ERR_ODD_DIR_ITEM)
667                 fprintf(stderr, ", odd dir item");
668         if (errors & I_ERR_ODD_FILE_EXTENT)
669                 fprintf(stderr, ", odd file extent");
670         if (errors & I_ERR_BAD_FILE_EXTENT)
671                 fprintf(stderr, ", bad file extent");
672         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
673                 fprintf(stderr, ", file extent overlap");
674         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
675                 fprintf(stderr, ", file extent discount");
676         if (errors & I_ERR_DIR_ISIZE_WRONG)
677                 fprintf(stderr, ", dir isize wrong");
678         if (errors & I_ERR_FILE_NBYTES_WRONG)
679                 fprintf(stderr, ", nbytes wrong");
680         if (errors & I_ERR_ODD_CSUM_ITEM)
681                 fprintf(stderr, ", odd csum item");
682         if (errors & I_ERR_SOME_CSUM_MISSING)
683                 fprintf(stderr, ", some csum missing");
684         if (errors & I_ERR_LINK_COUNT_WRONG)
685                 fprintf(stderr, ", link count wrong");
686         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
687                 fprintf(stderr, ", orphan file extent");
688         fprintf(stderr, "\n");
689         /* Print the orphan extents if needed */
690         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
691                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
692
693         /* Print the holes if needed */
694         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
695                 struct file_extent_hole *hole;
696                 struct rb_node *node;
697                 int found = 0;
698
699                 node = rb_first(&rec->holes);
700                 fprintf(stderr, "Found file extent holes:\n");
701                 while (node) {
702                         found = 1;
703                         hole = rb_entry(node, struct file_extent_hole, node);
704                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
705                                 hole->start, hole->len);
706                         node = rb_next(node);
707                 }
708                 if (!found)
709                         fprintf(stderr, "\tstart: 0, len: %llu\n",
710                                 round_up(rec->isize, root->sectorsize));
711         }
712 }
713
714 static void print_ref_error(int errors)
715 {
716         if (errors & REF_ERR_NO_DIR_ITEM)
717                 fprintf(stderr, ", no dir item");
718         if (errors & REF_ERR_NO_DIR_INDEX)
719                 fprintf(stderr, ", no dir index");
720         if (errors & REF_ERR_NO_INODE_REF)
721                 fprintf(stderr, ", no inode ref");
722         if (errors & REF_ERR_DUP_DIR_ITEM)
723                 fprintf(stderr, ", dup dir item");
724         if (errors & REF_ERR_DUP_DIR_INDEX)
725                 fprintf(stderr, ", dup dir index");
726         if (errors & REF_ERR_DUP_INODE_REF)
727                 fprintf(stderr, ", dup inode ref");
728         if (errors & REF_ERR_INDEX_UNMATCH)
729                 fprintf(stderr, ", index unmatch");
730         if (errors & REF_ERR_FILETYPE_UNMATCH)
731                 fprintf(stderr, ", filetype unmatch");
732         if (errors & REF_ERR_NAME_TOO_LONG)
733                 fprintf(stderr, ", name too long");
734         if (errors & REF_ERR_NO_ROOT_REF)
735                 fprintf(stderr, ", no root ref");
736         if (errors & REF_ERR_NO_ROOT_BACKREF)
737                 fprintf(stderr, ", no root backref");
738         if (errors & REF_ERR_DUP_ROOT_REF)
739                 fprintf(stderr, ", dup root ref");
740         if (errors & REF_ERR_DUP_ROOT_BACKREF)
741                 fprintf(stderr, ", dup root backref");
742         fprintf(stderr, "\n");
743 }
744
745 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
746                                           u64 ino, int mod)
747 {
748         struct ptr_node *node;
749         struct cache_extent *cache;
750         struct inode_record *rec = NULL;
751         int ret;
752
753         cache = lookup_cache_extent(inode_cache, ino, 1);
754         if (cache) {
755                 node = container_of(cache, struct ptr_node, cache);
756                 rec = node->data;
757                 if (mod && rec->refs > 1) {
758                         node->data = clone_inode_rec(rec);
759                         if (IS_ERR(node->data))
760                                 return node->data;
761                         rec->refs--;
762                         rec = node->data;
763                 }
764         } else if (mod) {
765                 rec = calloc(1, sizeof(*rec));
766                 if (!rec)
767                         return ERR_PTR(-ENOMEM);
768                 rec->ino = ino;
769                 rec->extent_start = (u64)-1;
770                 rec->refs = 1;
771                 INIT_LIST_HEAD(&rec->backrefs);
772                 INIT_LIST_HEAD(&rec->orphan_extents);
773                 rec->holes = RB_ROOT;
774
775                 node = malloc(sizeof(*node));
776                 if (!node) {
777                         free(rec);
778                         return ERR_PTR(-ENOMEM);
779                 }
780                 node->cache.start = ino;
781                 node->cache.size = 1;
782                 node->data = rec;
783
784                 if (ino == BTRFS_FREE_INO_OBJECTID)
785                         rec->found_link = 1;
786
787                 ret = insert_cache_extent(inode_cache, &node->cache);
788                 if (ret)
789                         return ERR_PTR(-EEXIST);
790         }
791         return rec;
792 }
793
794 static void free_orphan_data_extents(struct list_head *orphan_extents)
795 {
796         struct orphan_data_extent *orphan;
797
798         while (!list_empty(orphan_extents)) {
799                 orphan = list_entry(orphan_extents->next,
800                                     struct orphan_data_extent, list);
801                 list_del(&orphan->list);
802                 free(orphan);
803         }
804 }
805
806 static void free_inode_rec(struct inode_record *rec)
807 {
808         struct inode_backref *backref;
809
810         if (--rec->refs > 0)
811                 return;
812
813         while (!list_empty(&rec->backrefs)) {
814                 backref = list_entry(rec->backrefs.next,
815                                      struct inode_backref, list);
816                 list_del(&backref->list);
817                 free(backref);
818         }
819         free_orphan_data_extents(&rec->orphan_extents);
820         free_file_extent_holes(&rec->holes);
821         free(rec);
822 }
823
824 static int can_free_inode_rec(struct inode_record *rec)
825 {
826         if (!rec->errors && rec->checked && rec->found_inode_item &&
827             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
828                 return 1;
829         return 0;
830 }
831
832 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
833                                  struct inode_record *rec)
834 {
835         struct cache_extent *cache;
836         struct inode_backref *tmp, *backref;
837         struct ptr_node *node;
838         unsigned char filetype;
839
840         if (!rec->found_inode_item)
841                 return;
842
843         filetype = imode_to_type(rec->imode);
844         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
845                 if (backref->found_dir_item && backref->found_dir_index) {
846                         if (backref->filetype != filetype)
847                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
848                         if (!backref->errors && backref->found_inode_ref &&
849                             rec->nlink == rec->found_link) {
850                                 list_del(&backref->list);
851                                 free(backref);
852                         }
853                 }
854         }
855
856         if (!rec->checked || rec->merging)
857                 return;
858
859         if (S_ISDIR(rec->imode)) {
860                 if (rec->found_size != rec->isize)
861                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
862                 if (rec->found_file_extent)
863                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
864         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
865                 if (rec->found_dir_item)
866                         rec->errors |= I_ERR_ODD_DIR_ITEM;
867                 if (rec->found_size != rec->nbytes)
868                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
869                 if (rec->nlink > 0 && !no_holes &&
870                     (rec->extent_end < rec->isize ||
871                      first_extent_gap(&rec->holes) < rec->isize))
872                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
873         }
874
875         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
876                 if (rec->found_csum_item && rec->nodatasum)
877                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
878                 if (rec->some_csum_missing && !rec->nodatasum)
879                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
880         }
881
882         BUG_ON(rec->refs != 1);
883         if (can_free_inode_rec(rec)) {
884                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
885                 node = container_of(cache, struct ptr_node, cache);
886                 BUG_ON(node->data != rec);
887                 remove_cache_extent(inode_cache, &node->cache);
888                 free(node);
889                 free_inode_rec(rec);
890         }
891 }
892
893 static int check_orphan_item(struct btrfs_root *root, u64 ino)
894 {
895         struct btrfs_path path;
896         struct btrfs_key key;
897         int ret;
898
899         key.objectid = BTRFS_ORPHAN_OBJECTID;
900         key.type = BTRFS_ORPHAN_ITEM_KEY;
901         key.offset = ino;
902
903         btrfs_init_path(&path);
904         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
905         btrfs_release_path(&path);
906         if (ret > 0)
907                 ret = -ENOENT;
908         return ret;
909 }
910
911 static int process_inode_item(struct extent_buffer *eb,
912                               int slot, struct btrfs_key *key,
913                               struct shared_node *active_node)
914 {
915         struct inode_record *rec;
916         struct btrfs_inode_item *item;
917
918         rec = active_node->current;
919         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
920         if (rec->found_inode_item) {
921                 rec->errors |= I_ERR_DUP_INODE_ITEM;
922                 return 1;
923         }
924         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
925         rec->nlink = btrfs_inode_nlink(eb, item);
926         rec->isize = btrfs_inode_size(eb, item);
927         rec->nbytes = btrfs_inode_nbytes(eb, item);
928         rec->imode = btrfs_inode_mode(eb, item);
929         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
930                 rec->nodatasum = 1;
931         rec->found_inode_item = 1;
932         if (rec->nlink == 0)
933                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
934         maybe_free_inode_rec(&active_node->inode_cache, rec);
935         return 0;
936 }
937
938 static struct inode_backref *get_inode_backref(struct inode_record *rec,
939                                                 const char *name,
940                                                 int namelen, u64 dir)
941 {
942         struct inode_backref *backref;
943
944         list_for_each_entry(backref, &rec->backrefs, list) {
945                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
946                         break;
947                 if (backref->dir != dir || backref->namelen != namelen)
948                         continue;
949                 if (memcmp(name, backref->name, namelen))
950                         continue;
951                 return backref;
952         }
953
954         backref = malloc(sizeof(*backref) + namelen + 1);
955         if (!backref)
956                 return NULL;
957         memset(backref, 0, sizeof(*backref));
958         backref->dir = dir;
959         backref->namelen = namelen;
960         memcpy(backref->name, name, namelen);
961         backref->name[namelen] = '\0';
962         list_add_tail(&backref->list, &rec->backrefs);
963         return backref;
964 }
965
966 static int add_inode_backref(struct cache_tree *inode_cache,
967                              u64 ino, u64 dir, u64 index,
968                              const char *name, int namelen,
969                              int filetype, int itemtype, int errors)
970 {
971         struct inode_record *rec;
972         struct inode_backref *backref;
973
974         rec = get_inode_rec(inode_cache, ino, 1);
975         BUG_ON(IS_ERR(rec));
976         backref = get_inode_backref(rec, name, namelen, dir);
977         BUG_ON(!backref);
978         if (errors)
979                 backref->errors |= errors;
980         if (itemtype == BTRFS_DIR_INDEX_KEY) {
981                 if (backref->found_dir_index)
982                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
983                 if (backref->found_inode_ref && backref->index != index)
984                         backref->errors |= REF_ERR_INDEX_UNMATCH;
985                 if (backref->found_dir_item && backref->filetype != filetype)
986                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
987
988                 backref->index = index;
989                 backref->filetype = filetype;
990                 backref->found_dir_index = 1;
991         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
992                 rec->found_link++;
993                 if (backref->found_dir_item)
994                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
995                 if (backref->found_dir_index && backref->filetype != filetype)
996                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
997
998                 backref->filetype = filetype;
999                 backref->found_dir_item = 1;
1000         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1001                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1002                 if (backref->found_inode_ref)
1003                         backref->errors |= REF_ERR_DUP_INODE_REF;
1004                 if (backref->found_dir_index && backref->index != index)
1005                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1006                 else
1007                         backref->index = index;
1008
1009                 backref->ref_type = itemtype;
1010                 backref->found_inode_ref = 1;
1011         } else {
1012                 BUG_ON(1);
1013         }
1014
1015         maybe_free_inode_rec(inode_cache, rec);
1016         return 0;
1017 }
1018
1019 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1020                             struct cache_tree *dst_cache)
1021 {
1022         struct inode_backref *backref;
1023         u32 dir_count = 0;
1024         int ret = 0;
1025
1026         dst->merging = 1;
1027         list_for_each_entry(backref, &src->backrefs, list) {
1028                 if (backref->found_dir_index) {
1029                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1030                                         backref->index, backref->name,
1031                                         backref->namelen, backref->filetype,
1032                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1033                 }
1034                 if (backref->found_dir_item) {
1035                         dir_count++;
1036                         add_inode_backref(dst_cache, dst->ino,
1037                                         backref->dir, 0, backref->name,
1038                                         backref->namelen, backref->filetype,
1039                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1040                 }
1041                 if (backref->found_inode_ref) {
1042                         add_inode_backref(dst_cache, dst->ino,
1043                                         backref->dir, backref->index,
1044                                         backref->name, backref->namelen, 0,
1045                                         backref->ref_type, backref->errors);
1046                 }
1047         }
1048
1049         if (src->found_dir_item)
1050                 dst->found_dir_item = 1;
1051         if (src->found_file_extent)
1052                 dst->found_file_extent = 1;
1053         if (src->found_csum_item)
1054                 dst->found_csum_item = 1;
1055         if (src->some_csum_missing)
1056                 dst->some_csum_missing = 1;
1057         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1058                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1059                 if (ret < 0)
1060                         return ret;
1061         }
1062
1063         BUG_ON(src->found_link < dir_count);
1064         dst->found_link += src->found_link - dir_count;
1065         dst->found_size += src->found_size;
1066         if (src->extent_start != (u64)-1) {
1067                 if (dst->extent_start == (u64)-1) {
1068                         dst->extent_start = src->extent_start;
1069                         dst->extent_end = src->extent_end;
1070                 } else {
1071                         if (dst->extent_end > src->extent_start)
1072                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1073                         else if (dst->extent_end < src->extent_start) {
1074                                 ret = add_file_extent_hole(&dst->holes,
1075                                         dst->extent_end,
1076                                         src->extent_start - dst->extent_end);
1077                         }
1078                         if (dst->extent_end < src->extent_end)
1079                                 dst->extent_end = src->extent_end;
1080                 }
1081         }
1082
1083         dst->errors |= src->errors;
1084         if (src->found_inode_item) {
1085                 if (!dst->found_inode_item) {
1086                         dst->nlink = src->nlink;
1087                         dst->isize = src->isize;
1088                         dst->nbytes = src->nbytes;
1089                         dst->imode = src->imode;
1090                         dst->nodatasum = src->nodatasum;
1091                         dst->found_inode_item = 1;
1092                 } else {
1093                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1094                 }
1095         }
1096         dst->merging = 0;
1097
1098         return 0;
1099 }
1100
1101 static int splice_shared_node(struct shared_node *src_node,
1102                               struct shared_node *dst_node)
1103 {
1104         struct cache_extent *cache;
1105         struct ptr_node *node, *ins;
1106         struct cache_tree *src, *dst;
1107         struct inode_record *rec, *conflict;
1108         u64 current_ino = 0;
1109         int splice = 0;
1110         int ret;
1111
1112         if (--src_node->refs == 0)
1113                 splice = 1;
1114         if (src_node->current)
1115                 current_ino = src_node->current->ino;
1116
1117         src = &src_node->root_cache;
1118         dst = &dst_node->root_cache;
1119 again:
1120         cache = search_cache_extent(src, 0);
1121         while (cache) {
1122                 node = container_of(cache, struct ptr_node, cache);
1123                 rec = node->data;
1124                 cache = next_cache_extent(cache);
1125
1126                 if (splice) {
1127                         remove_cache_extent(src, &node->cache);
1128                         ins = node;
1129                 } else {
1130                         ins = malloc(sizeof(*ins));
1131                         BUG_ON(!ins);
1132                         ins->cache.start = node->cache.start;
1133                         ins->cache.size = node->cache.size;
1134                         ins->data = rec;
1135                         rec->refs++;
1136                 }
1137                 ret = insert_cache_extent(dst, &ins->cache);
1138                 if (ret == -EEXIST) {
1139                         conflict = get_inode_rec(dst, rec->ino, 1);
1140                         BUG_ON(IS_ERR(conflict));
1141                         merge_inode_recs(rec, conflict, dst);
1142                         if (rec->checked) {
1143                                 conflict->checked = 1;
1144                                 if (dst_node->current == conflict)
1145                                         dst_node->current = NULL;
1146                         }
1147                         maybe_free_inode_rec(dst, conflict);
1148                         free_inode_rec(rec);
1149                         free(ins);
1150                 } else {
1151                         BUG_ON(ret);
1152                 }
1153         }
1154
1155         if (src == &src_node->root_cache) {
1156                 src = &src_node->inode_cache;
1157                 dst = &dst_node->inode_cache;
1158                 goto again;
1159         }
1160
1161         if (current_ino > 0 && (!dst_node->current ||
1162             current_ino > dst_node->current->ino)) {
1163                 if (dst_node->current) {
1164                         dst_node->current->checked = 1;
1165                         maybe_free_inode_rec(dst, dst_node->current);
1166                 }
1167                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1168                 BUG_ON(IS_ERR(dst_node->current));
1169         }
1170         return 0;
1171 }
1172
1173 static void free_inode_ptr(struct cache_extent *cache)
1174 {
1175         struct ptr_node *node;
1176         struct inode_record *rec;
1177
1178         node = container_of(cache, struct ptr_node, cache);
1179         rec = node->data;
1180         free_inode_rec(rec);
1181         free(node);
1182 }
1183
1184 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1185
1186 static struct shared_node *find_shared_node(struct cache_tree *shared,
1187                                             u64 bytenr)
1188 {
1189         struct cache_extent *cache;
1190         struct shared_node *node;
1191
1192         cache = lookup_cache_extent(shared, bytenr, 1);
1193         if (cache) {
1194                 node = container_of(cache, struct shared_node, cache);
1195                 return node;
1196         }
1197         return NULL;
1198 }
1199
1200 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1201 {
1202         int ret;
1203         struct shared_node *node;
1204
1205         node = calloc(1, sizeof(*node));
1206         if (!node)
1207                 return -ENOMEM;
1208         node->cache.start = bytenr;
1209         node->cache.size = 1;
1210         cache_tree_init(&node->root_cache);
1211         cache_tree_init(&node->inode_cache);
1212         node->refs = refs;
1213
1214         ret = insert_cache_extent(shared, &node->cache);
1215
1216         return ret;
1217 }
1218
1219 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1220                              struct walk_control *wc, int level)
1221 {
1222         struct shared_node *node;
1223         struct shared_node *dest;
1224         int ret;
1225
1226         if (level == wc->active_node)
1227                 return 0;
1228
1229         BUG_ON(wc->active_node <= level);
1230         node = find_shared_node(&wc->shared, bytenr);
1231         if (!node) {
1232                 ret = add_shared_node(&wc->shared, bytenr, refs);
1233                 BUG_ON(ret);
1234                 node = find_shared_node(&wc->shared, bytenr);
1235                 wc->nodes[level] = node;
1236                 wc->active_node = level;
1237                 return 0;
1238         }
1239
1240         if (wc->root_level == wc->active_node &&
1241             btrfs_root_refs(&root->root_item) == 0) {
1242                 if (--node->refs == 0) {
1243                         free_inode_recs_tree(&node->root_cache);
1244                         free_inode_recs_tree(&node->inode_cache);
1245                         remove_cache_extent(&wc->shared, &node->cache);
1246                         free(node);
1247                 }
1248                 return 1;
1249         }
1250
1251         dest = wc->nodes[wc->active_node];
1252         splice_shared_node(node, dest);
1253         if (node->refs == 0) {
1254                 remove_cache_extent(&wc->shared, &node->cache);
1255                 free(node);
1256         }
1257         return 1;
1258 }
1259
1260 static int leave_shared_node(struct btrfs_root *root,
1261                              struct walk_control *wc, int level)
1262 {
1263         struct shared_node *node;
1264         struct shared_node *dest;
1265         int i;
1266
1267         if (level == wc->root_level)
1268                 return 0;
1269
1270         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1271                 if (wc->nodes[i])
1272                         break;
1273         }
1274         BUG_ON(i >= BTRFS_MAX_LEVEL);
1275
1276         node = wc->nodes[wc->active_node];
1277         wc->nodes[wc->active_node] = NULL;
1278         wc->active_node = i;
1279
1280         dest = wc->nodes[wc->active_node];
1281         if (wc->active_node < wc->root_level ||
1282             btrfs_root_refs(&root->root_item) > 0) {
1283                 BUG_ON(node->refs <= 1);
1284                 splice_shared_node(node, dest);
1285         } else {
1286                 BUG_ON(node->refs < 2);
1287                 node->refs--;
1288         }
1289         return 0;
1290 }
1291
1292 /*
1293  * Returns:
1294  * < 0 - on error
1295  * 1   - if the root with id child_root_id is a child of root parent_root_id
1296  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1297  *       has other root(s) as parent(s)
1298  * 2   - if the root child_root_id doesn't have any parent roots
1299  */
1300 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1301                          u64 child_root_id)
1302 {
1303         struct btrfs_path path;
1304         struct btrfs_key key;
1305         struct extent_buffer *leaf;
1306         int has_parent = 0;
1307         int ret;
1308
1309         btrfs_init_path(&path);
1310
1311         key.objectid = parent_root_id;
1312         key.type = BTRFS_ROOT_REF_KEY;
1313         key.offset = child_root_id;
1314         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1315                                 0, 0);
1316         if (ret < 0)
1317                 return ret;
1318         btrfs_release_path(&path);
1319         if (!ret)
1320                 return 1;
1321
1322         key.objectid = child_root_id;
1323         key.type = BTRFS_ROOT_BACKREF_KEY;
1324         key.offset = 0;
1325         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1326                                 0, 0);
1327         if (ret < 0)
1328                 goto out;
1329
1330         while (1) {
1331                 leaf = path.nodes[0];
1332                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1333                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1334                         if (ret)
1335                                 break;
1336                         leaf = path.nodes[0];
1337                 }
1338
1339                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1340                 if (key.objectid != child_root_id ||
1341                     key.type != BTRFS_ROOT_BACKREF_KEY)
1342                         break;
1343
1344                 has_parent = 1;
1345
1346                 if (key.offset == parent_root_id) {
1347                         btrfs_release_path(&path);
1348                         return 1;
1349                 }
1350
1351                 path.slots[0]++;
1352         }
1353 out:
1354         btrfs_release_path(&path);
1355         if (ret < 0)
1356                 return ret;
1357         return has_parent ? 0 : 2;
1358 }
1359
1360 static int process_dir_item(struct btrfs_root *root,
1361                             struct extent_buffer *eb,
1362                             int slot, struct btrfs_key *key,
1363                             struct shared_node *active_node)
1364 {
1365         u32 total;
1366         u32 cur = 0;
1367         u32 len;
1368         u32 name_len;
1369         u32 data_len;
1370         int error;
1371         int nritems = 0;
1372         int filetype;
1373         struct btrfs_dir_item *di;
1374         struct inode_record *rec;
1375         struct cache_tree *root_cache;
1376         struct cache_tree *inode_cache;
1377         struct btrfs_key location;
1378         char namebuf[BTRFS_NAME_LEN];
1379
1380         root_cache = &active_node->root_cache;
1381         inode_cache = &active_node->inode_cache;
1382         rec = active_node->current;
1383         rec->found_dir_item = 1;
1384
1385         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1386         total = btrfs_item_size_nr(eb, slot);
1387         while (cur < total) {
1388                 nritems++;
1389                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1390                 name_len = btrfs_dir_name_len(eb, di);
1391                 data_len = btrfs_dir_data_len(eb, di);
1392                 filetype = btrfs_dir_type(eb, di);
1393
1394                 rec->found_size += name_len;
1395                 if (name_len <= BTRFS_NAME_LEN) {
1396                         len = name_len;
1397                         error = 0;
1398                 } else {
1399                         len = BTRFS_NAME_LEN;
1400                         error = REF_ERR_NAME_TOO_LONG;
1401                 }
1402                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1403
1404                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1405                         add_inode_backref(inode_cache, location.objectid,
1406                                           key->objectid, key->offset, namebuf,
1407                                           len, filetype, key->type, error);
1408                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1409                         add_inode_backref(root_cache, location.objectid,
1410                                           key->objectid, key->offset,
1411                                           namebuf, len, filetype,
1412                                           key->type, error);
1413                 } else {
1414                         fprintf(stderr, "invalid location in dir item %u\n",
1415                                 location.type);
1416                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1417                                           key->objectid, key->offset, namebuf,
1418                                           len, filetype, key->type, error);
1419                 }
1420
1421                 len = sizeof(*di) + name_len + data_len;
1422                 di = (struct btrfs_dir_item *)((char *)di + len);
1423                 cur += len;
1424         }
1425         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1426                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1427
1428         return 0;
1429 }
1430
1431 static int process_inode_ref(struct extent_buffer *eb,
1432                              int slot, struct btrfs_key *key,
1433                              struct shared_node *active_node)
1434 {
1435         u32 total;
1436         u32 cur = 0;
1437         u32 len;
1438         u32 name_len;
1439         u64 index;
1440         int error;
1441         struct cache_tree *inode_cache;
1442         struct btrfs_inode_ref *ref;
1443         char namebuf[BTRFS_NAME_LEN];
1444
1445         inode_cache = &active_node->inode_cache;
1446
1447         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1448         total = btrfs_item_size_nr(eb, slot);
1449         while (cur < total) {
1450                 name_len = btrfs_inode_ref_name_len(eb, ref);
1451                 index = btrfs_inode_ref_index(eb, ref);
1452                 if (name_len <= BTRFS_NAME_LEN) {
1453                         len = name_len;
1454                         error = 0;
1455                 } else {
1456                         len = BTRFS_NAME_LEN;
1457                         error = REF_ERR_NAME_TOO_LONG;
1458                 }
1459                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1460                 add_inode_backref(inode_cache, key->objectid, key->offset,
1461                                   index, namebuf, len, 0, key->type, error);
1462
1463                 len = sizeof(*ref) + name_len;
1464                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1465                 cur += len;
1466         }
1467         return 0;
1468 }
1469
1470 static int process_inode_extref(struct extent_buffer *eb,
1471                                 int slot, struct btrfs_key *key,
1472                                 struct shared_node *active_node)
1473 {
1474         u32 total;
1475         u32 cur = 0;
1476         u32 len;
1477         u32 name_len;
1478         u64 index;
1479         u64 parent;
1480         int error;
1481         struct cache_tree *inode_cache;
1482         struct btrfs_inode_extref *extref;
1483         char namebuf[BTRFS_NAME_LEN];
1484
1485         inode_cache = &active_node->inode_cache;
1486
1487         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1488         total = btrfs_item_size_nr(eb, slot);
1489         while (cur < total) {
1490                 name_len = btrfs_inode_extref_name_len(eb, extref);
1491                 index = btrfs_inode_extref_index(eb, extref);
1492                 parent = btrfs_inode_extref_parent(eb, extref);
1493                 if (name_len <= BTRFS_NAME_LEN) {
1494                         len = name_len;
1495                         error = 0;
1496                 } else {
1497                         len = BTRFS_NAME_LEN;
1498                         error = REF_ERR_NAME_TOO_LONG;
1499                 }
1500                 read_extent_buffer(eb, namebuf,
1501                                    (unsigned long)(extref + 1), len);
1502                 add_inode_backref(inode_cache, key->objectid, parent,
1503                                   index, namebuf, len, 0, key->type, error);
1504
1505                 len = sizeof(*extref) + name_len;
1506                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1507                 cur += len;
1508         }
1509         return 0;
1510
1511 }
1512
1513 static int count_csum_range(struct btrfs_root *root, u64 start,
1514                             u64 len, u64 *found)
1515 {
1516         struct btrfs_key key;
1517         struct btrfs_path path;
1518         struct extent_buffer *leaf;
1519         int ret;
1520         size_t size;
1521         *found = 0;
1522         u64 csum_end;
1523         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1524
1525         btrfs_init_path(&path);
1526
1527         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1528         key.offset = start;
1529         key.type = BTRFS_EXTENT_CSUM_KEY;
1530
1531         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1532                                 &key, &path, 0, 0);
1533         if (ret < 0)
1534                 goto out;
1535         if (ret > 0 && path.slots[0] > 0) {
1536                 leaf = path.nodes[0];
1537                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1538                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1539                     key.type == BTRFS_EXTENT_CSUM_KEY)
1540                         path.slots[0]--;
1541         }
1542
1543         while (len > 0) {
1544                 leaf = path.nodes[0];
1545                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1546                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1547                         if (ret > 0)
1548                                 break;
1549                         else if (ret < 0)
1550                                 goto out;
1551                         leaf = path.nodes[0];
1552                 }
1553
1554                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1555                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1556                     key.type != BTRFS_EXTENT_CSUM_KEY)
1557                         break;
1558
1559                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1560                 if (key.offset >= start + len)
1561                         break;
1562
1563                 if (key.offset > start)
1564                         start = key.offset;
1565
1566                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1567                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1568                 if (csum_end > start) {
1569                         size = min(csum_end - start, len);
1570                         len -= size;
1571                         start += size;
1572                         *found += size;
1573                 }
1574
1575                 path.slots[0]++;
1576         }
1577 out:
1578         btrfs_release_path(&path);
1579         if (ret < 0)
1580                 return ret;
1581         return 0;
1582 }
1583
1584 static int process_file_extent(struct btrfs_root *root,
1585                                 struct extent_buffer *eb,
1586                                 int slot, struct btrfs_key *key,
1587                                 struct shared_node *active_node)
1588 {
1589         struct inode_record *rec;
1590         struct btrfs_file_extent_item *fi;
1591         u64 num_bytes = 0;
1592         u64 disk_bytenr = 0;
1593         u64 extent_offset = 0;
1594         u64 mask = root->sectorsize - 1;
1595         int extent_type;
1596         int ret;
1597
1598         rec = active_node->current;
1599         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1600         rec->found_file_extent = 1;
1601
1602         if (rec->extent_start == (u64)-1) {
1603                 rec->extent_start = key->offset;
1604                 rec->extent_end = key->offset;
1605         }
1606
1607         if (rec->extent_end > key->offset)
1608                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1609         else if (rec->extent_end < key->offset) {
1610                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1611                                            key->offset - rec->extent_end);
1612                 if (ret < 0)
1613                         return ret;
1614         }
1615
1616         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1617         extent_type = btrfs_file_extent_type(eb, fi);
1618
1619         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1620                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1621                 if (num_bytes == 0)
1622                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1623                 rec->found_size += num_bytes;
1624                 num_bytes = (num_bytes + mask) & ~mask;
1625         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1626                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1627                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1628                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1629                 extent_offset = btrfs_file_extent_offset(eb, fi);
1630                 if (num_bytes == 0 || (num_bytes & mask))
1631                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1632                 if (num_bytes + extent_offset >
1633                     btrfs_file_extent_ram_bytes(eb, fi))
1634                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1635                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1636                     (btrfs_file_extent_compression(eb, fi) ||
1637                      btrfs_file_extent_encryption(eb, fi) ||
1638                      btrfs_file_extent_other_encoding(eb, fi)))
1639                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1640                 if (disk_bytenr > 0)
1641                         rec->found_size += num_bytes;
1642         } else {
1643                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1644         }
1645         rec->extent_end = key->offset + num_bytes;
1646
1647         /*
1648          * The data reloc tree will copy full extents into its inode and then
1649          * copy the corresponding csums.  Because the extent it copied could be
1650          * a preallocated extent that hasn't been written to yet there may be no
1651          * csums to copy, ergo we won't have csums for our file extent.  This is
1652          * ok so just don't bother checking csums if the inode belongs to the
1653          * data reloc tree.
1654          */
1655         if (disk_bytenr > 0 &&
1656             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1657                 u64 found;
1658                 if (btrfs_file_extent_compression(eb, fi))
1659                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1660                 else
1661                         disk_bytenr += extent_offset;
1662
1663                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1664                 if (ret < 0)
1665                         return ret;
1666                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1667                         if (found > 0)
1668                                 rec->found_csum_item = 1;
1669                         if (found < num_bytes)
1670                                 rec->some_csum_missing = 1;
1671                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1672                         if (found > 0)
1673                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1674                 }
1675         }
1676         return 0;
1677 }
1678
1679 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1680                             struct walk_control *wc)
1681 {
1682         struct btrfs_key key;
1683         u32 nritems;
1684         int i;
1685         int ret = 0;
1686         struct cache_tree *inode_cache;
1687         struct shared_node *active_node;
1688
1689         if (wc->root_level == wc->active_node &&
1690             btrfs_root_refs(&root->root_item) == 0)
1691                 return 0;
1692
1693         active_node = wc->nodes[wc->active_node];
1694         inode_cache = &active_node->inode_cache;
1695         nritems = btrfs_header_nritems(eb);
1696         for (i = 0; i < nritems; i++) {
1697                 btrfs_item_key_to_cpu(eb, &key, i);
1698
1699                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1700                         continue;
1701                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1702                         continue;
1703
1704                 if (active_node->current == NULL ||
1705                     active_node->current->ino < key.objectid) {
1706                         if (active_node->current) {
1707                                 active_node->current->checked = 1;
1708                                 maybe_free_inode_rec(inode_cache,
1709                                                      active_node->current);
1710                         }
1711                         active_node->current = get_inode_rec(inode_cache,
1712                                                              key.objectid, 1);
1713                         BUG_ON(IS_ERR(active_node->current));
1714                 }
1715                 switch (key.type) {
1716                 case BTRFS_DIR_ITEM_KEY:
1717                 case BTRFS_DIR_INDEX_KEY:
1718                         ret = process_dir_item(root, eb, i, &key, active_node);
1719                         break;
1720                 case BTRFS_INODE_REF_KEY:
1721                         ret = process_inode_ref(eb, i, &key, active_node);
1722                         break;
1723                 case BTRFS_INODE_EXTREF_KEY:
1724                         ret = process_inode_extref(eb, i, &key, active_node);
1725                         break;
1726                 case BTRFS_INODE_ITEM_KEY:
1727                         ret = process_inode_item(eb, i, &key, active_node);
1728                         break;
1729                 case BTRFS_EXTENT_DATA_KEY:
1730                         ret = process_file_extent(root, eb, i, &key,
1731                                                   active_node);
1732                         break;
1733                 default:
1734                         break;
1735                 };
1736         }
1737         return ret;
1738 }
1739
1740 static void reada_walk_down(struct btrfs_root *root,
1741                             struct extent_buffer *node, int slot)
1742 {
1743         u64 bytenr;
1744         u64 ptr_gen;
1745         u32 nritems;
1746         u32 blocksize;
1747         int i;
1748         int level;
1749
1750         level = btrfs_header_level(node);
1751         if (level != 1)
1752                 return;
1753
1754         nritems = btrfs_header_nritems(node);
1755         blocksize = btrfs_level_size(root, level - 1);
1756         for (i = slot; i < nritems; i++) {
1757                 bytenr = btrfs_node_blockptr(node, i);
1758                 ptr_gen = btrfs_node_ptr_generation(node, i);
1759                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1760         }
1761 }
1762
1763 /*
1764  * Check the child node/leaf by the following condition:
1765  * 1. the first item key of the node/leaf should be the same with the one
1766  *    in parent.
1767  * 2. block in parent node should match the child node/leaf.
1768  * 3. generation of parent node and child's header should be consistent.
1769  *
1770  * Or the child node/leaf pointed by the key in parent is not valid.
1771  *
1772  * We hope to check leaf owner too, but since subvol may share leaves,
1773  * which makes leaf owner check not so strong, key check should be
1774  * sufficient enough for that case.
1775  */
1776 static int check_child_node(struct btrfs_root *root,
1777                             struct extent_buffer *parent, int slot,
1778                             struct extent_buffer *child)
1779 {
1780         struct btrfs_key parent_key;
1781         struct btrfs_key child_key;
1782         int ret = 0;
1783
1784         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1785         if (btrfs_header_level(child) == 0)
1786                 btrfs_item_key_to_cpu(child, &child_key, 0);
1787         else
1788                 btrfs_node_key_to_cpu(child, &child_key, 0);
1789
1790         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1791                 ret = -EINVAL;
1792                 fprintf(stderr,
1793                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1794                         parent_key.objectid, parent_key.type, parent_key.offset,
1795                         child_key.objectid, child_key.type, child_key.offset);
1796         }
1797         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1798                 ret = -EINVAL;
1799                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1800                         btrfs_node_blockptr(parent, slot),
1801                         btrfs_header_bytenr(child));
1802         }
1803         if (btrfs_node_ptr_generation(parent, slot) !=
1804             btrfs_header_generation(child)) {
1805                 ret = -EINVAL;
1806                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1807                         btrfs_header_generation(child),
1808                         btrfs_node_ptr_generation(parent, slot));
1809         }
1810         return ret;
1811 }
1812
1813 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1814                           struct walk_control *wc, int *level)
1815 {
1816         enum btrfs_tree_block_status status;
1817         u64 bytenr;
1818         u64 ptr_gen;
1819         struct extent_buffer *next;
1820         struct extent_buffer *cur;
1821         u32 blocksize;
1822         int ret, err = 0;
1823         u64 refs;
1824
1825         WARN_ON(*level < 0);
1826         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1827         ret = btrfs_lookup_extent_info(NULL, root,
1828                                        path->nodes[*level]->start,
1829                                        *level, 1, &refs, NULL);
1830         if (ret < 0) {
1831                 err = ret;
1832                 goto out;
1833         }
1834
1835         if (refs > 1) {
1836                 ret = enter_shared_node(root, path->nodes[*level]->start,
1837                                         refs, wc, *level);
1838                 if (ret > 0) {
1839                         err = ret;
1840                         goto out;
1841                 }
1842         }
1843
1844         while (*level >= 0) {
1845                 WARN_ON(*level < 0);
1846                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1847                 cur = path->nodes[*level];
1848
1849                 if (btrfs_header_level(cur) != *level)
1850                         WARN_ON(1);
1851
1852                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1853                         break;
1854                 if (*level == 0) {
1855                         ret = process_one_leaf(root, cur, wc);
1856                         if (ret < 0)
1857                                 err = ret;
1858                         break;
1859                 }
1860                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1861                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1862                 blocksize = btrfs_level_size(root, *level - 1);
1863                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1864                                                1, &refs, NULL);
1865                 if (ret < 0)
1866                         refs = 0;
1867
1868                 if (refs > 1) {
1869                         ret = enter_shared_node(root, bytenr, refs,
1870                                                 wc, *level - 1);
1871                         if (ret > 0) {
1872                                 path->slots[*level]++;
1873                                 continue;
1874                         }
1875                 }
1876
1877                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1878                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1879                         free_extent_buffer(next);
1880                         reada_walk_down(root, cur, path->slots[*level]);
1881                         next = read_tree_block(root, bytenr, blocksize,
1882                                                ptr_gen);
1883                         if (!extent_buffer_uptodate(next)) {
1884                                 struct btrfs_key node_key;
1885
1886                                 btrfs_node_key_to_cpu(path->nodes[*level],
1887                                                       &node_key,
1888                                                       path->slots[*level]);
1889                                 btrfs_add_corrupt_extent_record(root->fs_info,
1890                                                 &node_key,
1891                                                 path->nodes[*level]->start,
1892                                                 root->leafsize, *level);
1893                                 err = -EIO;
1894                                 goto out;
1895                         }
1896                 }
1897
1898                 ret = check_child_node(root, cur, path->slots[*level], next);
1899                 if (ret) {
1900                         err = ret;
1901                         goto out;
1902                 }
1903
1904                 if (btrfs_is_leaf(next))
1905                         status = btrfs_check_leaf(root, NULL, next);
1906                 else
1907                         status = btrfs_check_node(root, NULL, next);
1908                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1909                         free_extent_buffer(next);
1910                         err = -EIO;
1911                         goto out;
1912                 }
1913
1914                 *level = *level - 1;
1915                 free_extent_buffer(path->nodes[*level]);
1916                 path->nodes[*level] = next;
1917                 path->slots[*level] = 0;
1918         }
1919 out:
1920         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1921         return err;
1922 }
1923
1924 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1925                         struct walk_control *wc, int *level)
1926 {
1927         int i;
1928         struct extent_buffer *leaf;
1929
1930         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1931                 leaf = path->nodes[i];
1932                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1933                         path->slots[i]++;
1934                         *level = i;
1935                         return 0;
1936                 } else {
1937                         free_extent_buffer(path->nodes[*level]);
1938                         path->nodes[*level] = NULL;
1939                         BUG_ON(*level > wc->active_node);
1940                         if (*level == wc->active_node)
1941                                 leave_shared_node(root, wc, *level);
1942                         *level = i + 1;
1943                 }
1944         }
1945         return 1;
1946 }
1947
1948 static int check_root_dir(struct inode_record *rec)
1949 {
1950         struct inode_backref *backref;
1951         int ret = -1;
1952
1953         if (!rec->found_inode_item || rec->errors)
1954                 goto out;
1955         if (rec->nlink != 1 || rec->found_link != 0)
1956                 goto out;
1957         if (list_empty(&rec->backrefs))
1958                 goto out;
1959         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1960         if (!backref->found_inode_ref)
1961                 goto out;
1962         if (backref->index != 0 || backref->namelen != 2 ||
1963             memcmp(backref->name, "..", 2))
1964                 goto out;
1965         if (backref->found_dir_index || backref->found_dir_item)
1966                 goto out;
1967         ret = 0;
1968 out:
1969         return ret;
1970 }
1971
1972 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1973                               struct btrfs_root *root, struct btrfs_path *path,
1974                               struct inode_record *rec)
1975 {
1976         struct btrfs_inode_item *ei;
1977         struct btrfs_key key;
1978         int ret;
1979
1980         key.objectid = rec->ino;
1981         key.type = BTRFS_INODE_ITEM_KEY;
1982         key.offset = (u64)-1;
1983
1984         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1985         if (ret < 0)
1986                 goto out;
1987         if (ret) {
1988                 if (!path->slots[0]) {
1989                         ret = -ENOENT;
1990                         goto out;
1991                 }
1992                 path->slots[0]--;
1993                 ret = 0;
1994         }
1995         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1996         if (key.objectid != rec->ino) {
1997                 ret = -ENOENT;
1998                 goto out;
1999         }
2000
2001         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2002                             struct btrfs_inode_item);
2003         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2004         btrfs_mark_buffer_dirty(path->nodes[0]);
2005         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2006         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2007                root->root_key.objectid);
2008 out:
2009         btrfs_release_path(path);
2010         return ret;
2011 }
2012
2013 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2014                                     struct btrfs_root *root,
2015                                     struct btrfs_path *path,
2016                                     struct inode_record *rec)
2017 {
2018         int ret;
2019
2020         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2021         btrfs_release_path(path);
2022         if (!ret)
2023                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2024         return ret;
2025 }
2026
2027 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2028                                struct btrfs_root *root,
2029                                struct btrfs_path *path,
2030                                struct inode_record *rec)
2031 {
2032         struct btrfs_inode_item *ei;
2033         struct btrfs_key key;
2034         int ret = 0;
2035
2036         key.objectid = rec->ino;
2037         key.type = BTRFS_INODE_ITEM_KEY;
2038         key.offset = 0;
2039
2040         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2041         if (ret) {
2042                 if (ret > 0)
2043                         ret = -ENOENT;
2044                 goto out;
2045         }
2046
2047         /* Since ret == 0, no need to check anything */
2048         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2049                             struct btrfs_inode_item);
2050         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2051         btrfs_mark_buffer_dirty(path->nodes[0]);
2052         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2053         printf("reset nbytes for ino %llu root %llu\n",
2054                rec->ino, root->root_key.objectid);
2055 out:
2056         btrfs_release_path(path);
2057         return ret;
2058 }
2059
2060 static int add_missing_dir_index(struct btrfs_root *root,
2061                                  struct cache_tree *inode_cache,
2062                                  struct inode_record *rec,
2063                                  struct inode_backref *backref)
2064 {
2065         struct btrfs_path *path;
2066         struct btrfs_trans_handle *trans;
2067         struct btrfs_dir_item *dir_item;
2068         struct extent_buffer *leaf;
2069         struct btrfs_key key;
2070         struct btrfs_disk_key disk_key;
2071         struct inode_record *dir_rec;
2072         unsigned long name_ptr;
2073         u32 data_size = sizeof(*dir_item) + backref->namelen;
2074         int ret;
2075
2076         path = btrfs_alloc_path();
2077         if (!path)
2078                 return -ENOMEM;
2079
2080         trans = btrfs_start_transaction(root, 1);
2081         if (IS_ERR(trans)) {
2082                 btrfs_free_path(path);
2083                 return PTR_ERR(trans);
2084         }
2085
2086         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2087                 (unsigned long long)rec->ino);
2088         key.objectid = backref->dir;
2089         key.type = BTRFS_DIR_INDEX_KEY;
2090         key.offset = backref->index;
2091
2092         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2093         BUG_ON(ret);
2094
2095         leaf = path->nodes[0];
2096         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2097
2098         disk_key.objectid = cpu_to_le64(rec->ino);
2099         disk_key.type = BTRFS_INODE_ITEM_KEY;
2100         disk_key.offset = 0;
2101
2102         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2103         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2104         btrfs_set_dir_data_len(leaf, dir_item, 0);
2105         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2106         name_ptr = (unsigned long)(dir_item + 1);
2107         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2108         btrfs_mark_buffer_dirty(leaf);
2109         btrfs_free_path(path);
2110         btrfs_commit_transaction(trans, root);
2111
2112         backref->found_dir_index = 1;
2113         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2114         BUG_ON(IS_ERR(dir_rec));
2115         if (!dir_rec)
2116                 return 0;
2117         dir_rec->found_size += backref->namelen;
2118         if (dir_rec->found_size == dir_rec->isize &&
2119             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2120                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2121         if (dir_rec->found_size != dir_rec->isize)
2122                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2123
2124         return 0;
2125 }
2126
2127 static int delete_dir_index(struct btrfs_root *root,
2128                             struct cache_tree *inode_cache,
2129                             struct inode_record *rec,
2130                             struct inode_backref *backref)
2131 {
2132         struct btrfs_trans_handle *trans;
2133         struct btrfs_dir_item *di;
2134         struct btrfs_path *path;
2135         int ret = 0;
2136
2137         path = btrfs_alloc_path();
2138         if (!path)
2139                 return -ENOMEM;
2140
2141         trans = btrfs_start_transaction(root, 1);
2142         if (IS_ERR(trans)) {
2143                 btrfs_free_path(path);
2144                 return PTR_ERR(trans);
2145         }
2146
2147
2148         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2149                 (unsigned long long)backref->dir,
2150                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2151                 (unsigned long long)root->objectid);
2152
2153         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2154                                     backref->name, backref->namelen,
2155                                     backref->index, -1);
2156         if (IS_ERR(di)) {
2157                 ret = PTR_ERR(di);
2158                 btrfs_free_path(path);
2159                 btrfs_commit_transaction(trans, root);
2160                 if (ret == -ENOENT)
2161                         return 0;
2162                 return ret;
2163         }
2164
2165         if (!di)
2166                 ret = btrfs_del_item(trans, root, path);
2167         else
2168                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2169         BUG_ON(ret);
2170         btrfs_free_path(path);
2171         btrfs_commit_transaction(trans, root);
2172         return ret;
2173 }
2174
2175 static int create_inode_item(struct btrfs_root *root,
2176                              struct inode_record *rec,
2177                              struct inode_backref *backref, int root_dir)
2178 {
2179         struct btrfs_trans_handle *trans;
2180         struct btrfs_inode_item inode_item;
2181         time_t now = time(NULL);
2182         int ret;
2183
2184         trans = btrfs_start_transaction(root, 1);
2185         if (IS_ERR(trans)) {
2186                 ret = PTR_ERR(trans);
2187                 return ret;
2188         }
2189
2190         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2191                 "be incomplete, please check permissions and content after "
2192                 "the fsck completes.\n", (unsigned long long)root->objectid,
2193                 (unsigned long long)rec->ino);
2194
2195         memset(&inode_item, 0, sizeof(inode_item));
2196         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2197         if (root_dir)
2198                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2199         else
2200                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2201         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2202         if (rec->found_dir_item) {
2203                 if (rec->found_file_extent)
2204                         fprintf(stderr, "root %llu inode %llu has both a dir "
2205                                 "item and extents, unsure if it is a dir or a "
2206                                 "regular file so setting it as a directory\n",
2207                                 (unsigned long long)root->objectid,
2208                                 (unsigned long long)rec->ino);
2209                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2210                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2211         } else if (!rec->found_dir_item) {
2212                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2213                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2214         }
2215         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2216         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2217         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2218         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2219         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2220         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2221         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2222         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2223
2224         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2225         BUG_ON(ret);
2226         btrfs_commit_transaction(trans, root);
2227         return 0;
2228 }
2229
2230 static int repair_inode_backrefs(struct btrfs_root *root,
2231                                  struct inode_record *rec,
2232                                  struct cache_tree *inode_cache,
2233                                  int delete)
2234 {
2235         struct inode_backref *tmp, *backref;
2236         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2237         int ret = 0;
2238         int repaired = 0;
2239
2240         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2241                 if (!delete && rec->ino == root_dirid) {
2242                         if (!rec->found_inode_item) {
2243                                 ret = create_inode_item(root, rec, backref, 1);
2244                                 if (ret)
2245                                         break;
2246                                 repaired++;
2247                         }
2248                 }
2249
2250                 /* Index 0 for root dir's are special, don't mess with it */
2251                 if (rec->ino == root_dirid && backref->index == 0)
2252                         continue;
2253
2254                 if (delete &&
2255                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2256                      (backref->found_dir_index && backref->found_inode_ref &&
2257                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2258                         ret = delete_dir_index(root, inode_cache, rec, backref);
2259                         if (ret)
2260                                 break;
2261                         repaired++;
2262                         list_del(&backref->list);
2263                         free(backref);
2264                 }
2265
2266                 if (!delete && !backref->found_dir_index &&
2267                     backref->found_dir_item && backref->found_inode_ref) {
2268                         ret = add_missing_dir_index(root, inode_cache, rec,
2269                                                     backref);
2270                         if (ret)
2271                                 break;
2272                         repaired++;
2273                         if (backref->found_dir_item &&
2274                             backref->found_dir_index &&
2275                             backref->found_dir_index) {
2276                                 if (!backref->errors &&
2277                                     backref->found_inode_ref) {
2278                                         list_del(&backref->list);
2279                                         free(backref);
2280                                 }
2281                         }
2282                 }
2283
2284                 if (!delete && (!backref->found_dir_index &&
2285                                 !backref->found_dir_item &&
2286                                 backref->found_inode_ref)) {
2287                         struct btrfs_trans_handle *trans;
2288                         struct btrfs_key location;
2289
2290                         ret = check_dir_conflict(root, backref->name,
2291                                                  backref->namelen,
2292                                                  backref->dir,
2293                                                  backref->index);
2294                         if (ret) {
2295                                 /*
2296                                  * let nlink fixing routine to handle it,
2297                                  * which can do it better.
2298                                  */
2299                                 ret = 0;
2300                                 break;
2301                         }
2302                         location.objectid = rec->ino;
2303                         location.type = BTRFS_INODE_ITEM_KEY;
2304                         location.offset = 0;
2305
2306                         trans = btrfs_start_transaction(root, 1);
2307                         if (IS_ERR(trans)) {
2308                                 ret = PTR_ERR(trans);
2309                                 break;
2310                         }
2311                         fprintf(stderr, "adding missing dir index/item pair "
2312                                 "for inode %llu\n",
2313                                 (unsigned long long)rec->ino);
2314                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2315                                                     backref->namelen,
2316                                                     backref->dir, &location,
2317                                                     imode_to_type(rec->imode),
2318                                                     backref->index);
2319                         BUG_ON(ret);
2320                         btrfs_commit_transaction(trans, root);
2321                         repaired++;
2322                 }
2323
2324                 if (!delete && (backref->found_inode_ref &&
2325                                 backref->found_dir_index &&
2326                                 backref->found_dir_item &&
2327                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2328                                 !rec->found_inode_item)) {
2329                         ret = create_inode_item(root, rec, backref, 0);
2330                         if (ret)
2331                                 break;
2332                         repaired++;
2333                 }
2334
2335         }
2336         return ret ? ret : repaired;
2337 }
2338
2339 /*
2340  * To determine the file type for nlink/inode_item repair
2341  *
2342  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2343  * Return -ENOENT if file type is not found.
2344  */
2345 static int find_file_type(struct inode_record *rec, u8 *type)
2346 {
2347         struct inode_backref *backref;
2348
2349         /* For inode item recovered case */
2350         if (rec->found_inode_item) {
2351                 *type = imode_to_type(rec->imode);
2352                 return 0;
2353         }
2354
2355         list_for_each_entry(backref, &rec->backrefs, list) {
2356                 if (backref->found_dir_index || backref->found_dir_item) {
2357                         *type = backref->filetype;
2358                         return 0;
2359                 }
2360         }
2361         return -ENOENT;
2362 }
2363
2364 /*
2365  * To determine the file name for nlink repair
2366  *
2367  * Return 0 if file name is found, set name and namelen.
2368  * Return -ENOENT if file name is not found.
2369  */
2370 static int find_file_name(struct inode_record *rec,
2371                           char *name, int *namelen)
2372 {
2373         struct inode_backref *backref;
2374
2375         list_for_each_entry(backref, &rec->backrefs, list) {
2376                 if (backref->found_dir_index || backref->found_dir_item ||
2377                     backref->found_inode_ref) {
2378                         memcpy(name, backref->name, backref->namelen);
2379                         *namelen = backref->namelen;
2380                         return 0;
2381                 }
2382         }
2383         return -ENOENT;
2384 }
2385
2386 /* Reset the nlink of the inode to the correct one */
2387 static int reset_nlink(struct btrfs_trans_handle *trans,
2388                        struct btrfs_root *root,
2389                        struct btrfs_path *path,
2390                        struct inode_record *rec)
2391 {
2392         struct inode_backref *backref;
2393         struct inode_backref *tmp;
2394         struct btrfs_key key;
2395         struct btrfs_inode_item *inode_item;
2396         int ret = 0;
2397
2398         /* We don't believe this either, reset it and iterate backref */
2399         rec->found_link = 0;
2400
2401         /* Remove all backref including the valid ones */
2402         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2403                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2404                                    backref->index, backref->name,
2405                                    backref->namelen, 0);
2406                 if (ret < 0)
2407                         goto out;
2408
2409                 /* remove invalid backref, so it won't be added back */
2410                 if (!(backref->found_dir_index &&
2411                       backref->found_dir_item &&
2412                       backref->found_inode_ref)) {
2413                         list_del(&backref->list);
2414                         free(backref);
2415                 } else {
2416                         rec->found_link++;
2417                 }
2418         }
2419
2420         /* Set nlink to 0 */
2421         key.objectid = rec->ino;
2422         key.type = BTRFS_INODE_ITEM_KEY;
2423         key.offset = 0;
2424         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2425         if (ret < 0)
2426                 goto out;
2427         if (ret > 0) {
2428                 ret = -ENOENT;
2429                 goto out;
2430         }
2431         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2432                                     struct btrfs_inode_item);
2433         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2434         btrfs_mark_buffer_dirty(path->nodes[0]);
2435         btrfs_release_path(path);
2436
2437         /*
2438          * Add back valid inode_ref/dir_item/dir_index,
2439          * add_link() will handle the nlink inc, so new nlink must be correct
2440          */
2441         list_for_each_entry(backref, &rec->backrefs, list) {
2442                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2443                                      backref->name, backref->namelen,
2444                                      backref->filetype, &backref->index, 1);
2445                 if (ret < 0)
2446                         goto out;
2447         }
2448 out:
2449         btrfs_release_path(path);
2450         return ret;
2451 }
2452
2453 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2454                                struct btrfs_root *root,
2455                                struct btrfs_path *path,
2456                                struct inode_record *rec)
2457 {
2458         char *dir_name = "lost+found";
2459         char namebuf[BTRFS_NAME_LEN] = {0};
2460         u64 lost_found_ino;
2461         u32 mode = 0700;
2462         u8 type = 0;
2463         int namelen = 0;
2464         int name_recovered = 0;
2465         int type_recovered = 0;
2466         int ret = 0;
2467
2468         /*
2469          * Get file name and type first before these invalid inode ref
2470          * are deleted by remove_all_invalid_backref()
2471          */
2472         name_recovered = !find_file_name(rec, namebuf, &namelen);
2473         type_recovered = !find_file_type(rec, &type);
2474
2475         if (!name_recovered) {
2476                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2477                        rec->ino, rec->ino);
2478                 namelen = count_digits(rec->ino);
2479                 sprintf(namebuf, "%llu", rec->ino);
2480                 name_recovered = 1;
2481         }
2482         if (!type_recovered) {
2483                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2484                        rec->ino);
2485                 type = BTRFS_FT_REG_FILE;
2486                 type_recovered = 1;
2487         }
2488
2489         ret = reset_nlink(trans, root, path, rec);
2490         if (ret < 0) {
2491                 fprintf(stderr,
2492                         "Failed to reset nlink for inode %llu: %s\n",
2493                         rec->ino, strerror(-ret));
2494                 goto out;
2495         }
2496
2497         if (rec->found_link == 0) {
2498                 lost_found_ino = root->highest_inode;
2499                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2500                         ret = -EOVERFLOW;
2501                         goto out;
2502                 }
2503                 lost_found_ino++;
2504                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2505                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2506                                   mode);
2507                 if (ret < 0) {
2508                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2509                                 dir_name, strerror(-ret));
2510                         goto out;
2511                 }
2512                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2513                                      namebuf, namelen, type, NULL, 1);
2514                 /*
2515                  * Add ".INO" suffix several times to handle case where
2516                  * "FILENAME.INO" is already taken by another file.
2517                  */
2518                 while (ret == -EEXIST) {
2519                         /*
2520                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2521                          */
2522                         if (namelen + count_digits(rec->ino) + 1 >
2523                             BTRFS_NAME_LEN) {
2524                                 ret = -EFBIG;
2525                                 goto out;
2526                         }
2527                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2528                                  ".%llu", rec->ino);
2529                         namelen += count_digits(rec->ino) + 1;
2530                         ret = btrfs_add_link(trans, root, rec->ino,
2531                                              lost_found_ino, namebuf,
2532                                              namelen, type, NULL, 1);
2533                 }
2534                 if (ret < 0) {
2535                         fprintf(stderr,
2536                                 "Failed to link the inode %llu to %s dir: %s\n",
2537                                 rec->ino, dir_name, strerror(-ret));
2538                         goto out;
2539                 }
2540                 /*
2541                  * Just increase the found_link, don't actually add the
2542                  * backref. This will make things easier and this inode
2543                  * record will be freed after the repair is done.
2544                  * So fsck will not report problem about this inode.
2545                  */
2546                 rec->found_link++;
2547                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2548                        namelen, namebuf, dir_name);
2549         }
2550         printf("Fixed the nlink of inode %llu\n", rec->ino);
2551 out:
2552         /*
2553          * Clear the flag anyway, or we will loop forever for the same inode
2554          * as it will not be removed from the bad inode list and the dead loop
2555          * happens.
2556          */
2557         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2558         btrfs_release_path(path);
2559         return ret;
2560 }
2561
2562 /*
2563  * Check if there is any normal(reg or prealloc) file extent for given
2564  * ino.
2565  * This is used to determine the file type when neither its dir_index/item or
2566  * inode_item exists.
2567  *
2568  * This will *NOT* report error, if any error happens, just consider it does
2569  * not have any normal file extent.
2570  */
2571 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2572 {
2573         struct btrfs_path *path;
2574         struct btrfs_key key;
2575         struct btrfs_key found_key;
2576         struct btrfs_file_extent_item *fi;
2577         u8 type;
2578         int ret = 0;
2579
2580         path = btrfs_alloc_path();
2581         if (!path)
2582                 goto out;
2583         key.objectid = ino;
2584         key.type = BTRFS_EXTENT_DATA_KEY;
2585         key.offset = 0;
2586
2587         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2588         if (ret < 0) {
2589                 ret = 0;
2590                 goto out;
2591         }
2592         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2593                 ret = btrfs_next_leaf(root, path);
2594                 if (ret) {
2595                         ret = 0;
2596                         goto out;
2597                 }
2598         }
2599         while (1) {
2600                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2601                                       path->slots[0]);
2602                 if (found_key.objectid != ino ||
2603                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2604                         break;
2605                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2606                                     struct btrfs_file_extent_item);
2607                 type = btrfs_file_extent_type(path->nodes[0], fi);
2608                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2609                         ret = 1;
2610                         goto out;
2611                 }
2612         }
2613 out:
2614         btrfs_free_path(path);
2615         return ret;
2616 }
2617
2618 static u32 btrfs_type_to_imode(u8 type)
2619 {
2620         static u32 imode_by_btrfs_type[] = {
2621                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2622                 [BTRFS_FT_DIR]          = S_IFDIR,
2623                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2624                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2625                 [BTRFS_FT_FIFO]         = S_IFIFO,
2626                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2627                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2628         };
2629
2630         return imode_by_btrfs_type[(type)];
2631 }
2632
2633 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2634                                 struct btrfs_root *root,
2635                                 struct btrfs_path *path,
2636                                 struct inode_record *rec)
2637 {
2638         u8 filetype;
2639         u32 mode = 0700;
2640         int type_recovered = 0;
2641         int ret = 0;
2642
2643         printf("Trying to rebuild inode:%llu\n", rec->ino);
2644
2645         type_recovered = !find_file_type(rec, &filetype);
2646
2647         /*
2648          * Try to determine inode type if type not found.
2649          *
2650          * For found regular file extent, it must be FILE.
2651          * For found dir_item/index, it must be DIR.
2652          *
2653          * For undetermined one, use FILE as fallback.
2654          *
2655          * TODO:
2656          * 1. If found backref(inode_index/item is already handled) to it,
2657          *    it must be DIR.
2658          *    Need new inode-inode ref structure to allow search for that.
2659          */
2660         if (!type_recovered) {
2661                 if (rec->found_file_extent &&
2662                     find_normal_file_extent(root, rec->ino)) {
2663                         type_recovered = 1;
2664                         filetype = BTRFS_FT_REG_FILE;
2665                 } else if (rec->found_dir_item) {
2666                         type_recovered = 1;
2667                         filetype = BTRFS_FT_DIR;
2668                 } else if (!list_empty(&rec->orphan_extents)) {
2669                         type_recovered = 1;
2670                         filetype = BTRFS_FT_REG_FILE;
2671                 } else{
2672                         printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2673                                rec->ino);
2674                         type_recovered = 1;
2675                         filetype = BTRFS_FT_REG_FILE;
2676                 }
2677         }
2678
2679         ret = btrfs_new_inode(trans, root, rec->ino,
2680                               mode | btrfs_type_to_imode(filetype));
2681         if (ret < 0)
2682                 goto out;
2683
2684         /*
2685          * Here inode rebuild is done, we only rebuild the inode item,
2686          * don't repair the nlink(like move to lost+found).
2687          * That is the job of nlink repair.
2688          *
2689          * We just fill the record and return
2690          */
2691         rec->found_dir_item = 1;
2692         rec->imode = mode | btrfs_type_to_imode(filetype);
2693         rec->nlink = 0;
2694         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2695         /* Ensure the inode_nlinks repair function will be called */
2696         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2697 out:
2698         return ret;
2699 }
2700
2701 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2702                                       struct btrfs_root *root,
2703                                       struct btrfs_path *path,
2704                                       struct inode_record *rec)
2705 {
2706         struct orphan_data_extent *orphan;
2707         struct orphan_data_extent *tmp;
2708         int ret = 0;
2709
2710         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2711                 /*
2712                  * Check for conflicting file extents
2713                  *
2714                  * Here we don't know whether the extents is compressed or not,
2715                  * so we can only assume it not compressed nor data offset,
2716                  * and use its disk_len as extent length.
2717                  */
2718                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2719                                        orphan->offset, orphan->disk_len, 0);
2720                 btrfs_release_path(path);
2721                 if (ret < 0)
2722                         goto out;
2723                 if (!ret) {
2724                         fprintf(stderr,
2725                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2726                                 orphan->disk_bytenr, orphan->disk_len);
2727                         ret = btrfs_free_extent(trans,
2728                                         root->fs_info->extent_root,
2729                                         orphan->disk_bytenr, orphan->disk_len,
2730                                         0, root->objectid, orphan->objectid,
2731                                         orphan->offset);
2732                         if (ret < 0)
2733                                 goto out;
2734                 }
2735                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2736                                 orphan->offset, orphan->disk_bytenr,
2737                                 orphan->disk_len, orphan->disk_len);
2738                 if (ret < 0)
2739                         goto out;
2740
2741                 /* Update file size info */
2742                 rec->found_size += orphan->disk_len;
2743                 if (rec->found_size == rec->nbytes)
2744                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2745
2746                 /* Update the file extent hole info too */
2747                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2748                                            orphan->disk_len);
2749                 if (ret < 0)
2750                         goto out;
2751                 if (RB_EMPTY_ROOT(&rec->holes))
2752                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2753
2754                 list_del(&orphan->list);
2755                 free(orphan);
2756         }
2757         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2758 out:
2759         return ret;
2760 }
2761
2762 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2763                                         struct btrfs_root *root,
2764                                         struct btrfs_path *path,
2765                                         struct inode_record *rec)
2766 {
2767         struct rb_node *node;
2768         struct file_extent_hole *hole;
2769         int found = 0;
2770         int ret = 0;
2771
2772         node = rb_first(&rec->holes);
2773
2774         while (node) {
2775                 found = 1;
2776                 hole = rb_entry(node, struct file_extent_hole, node);
2777                 ret = btrfs_punch_hole(trans, root, rec->ino,
2778                                        hole->start, hole->len);
2779                 if (ret < 0)
2780                         goto out;
2781                 ret = del_file_extent_hole(&rec->holes, hole->start,
2782                                            hole->len);
2783                 if (ret < 0)
2784                         goto out;
2785                 if (RB_EMPTY_ROOT(&rec->holes))
2786                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2787                 node = rb_first(&rec->holes);
2788         }
2789         /* special case for a file losing all its file extent */
2790         if (!found) {
2791                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2792                                        round_up(rec->isize, root->sectorsize));
2793                 if (ret < 0)
2794                         goto out;
2795         }
2796         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2797                rec->ino, root->objectid);
2798 out:
2799         return ret;
2800 }
2801
2802 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2803 {
2804         struct btrfs_trans_handle *trans;
2805         struct btrfs_path *path;
2806         int ret = 0;
2807
2808         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2809                              I_ERR_NO_ORPHAN_ITEM |
2810                              I_ERR_LINK_COUNT_WRONG |
2811                              I_ERR_NO_INODE_ITEM |
2812                              I_ERR_FILE_EXTENT_ORPHAN |
2813                              I_ERR_FILE_EXTENT_DISCOUNT|
2814                              I_ERR_FILE_NBYTES_WRONG)))
2815                 return rec->errors;
2816
2817         path = btrfs_alloc_path();
2818         if (!path)
2819                 return -ENOMEM;
2820
2821         /*
2822          * For nlink repair, it may create a dir and add link, so
2823          * 2 for parent(256)'s dir_index and dir_item
2824          * 2 for lost+found dir's inode_item and inode_ref
2825          * 1 for the new inode_ref of the file
2826          * 2 for lost+found dir's dir_index and dir_item for the file
2827          */
2828         trans = btrfs_start_transaction(root, 7);
2829         if (IS_ERR(trans)) {
2830                 btrfs_free_path(path);
2831                 return PTR_ERR(trans);
2832         }
2833
2834         if (rec->errors & I_ERR_NO_INODE_ITEM)
2835                 ret = repair_inode_no_item(trans, root, path, rec);
2836         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2837                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2838         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2839                 ret = repair_inode_discount_extent(trans, root, path, rec);
2840         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2841                 ret = repair_inode_isize(trans, root, path, rec);
2842         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2843                 ret = repair_inode_orphan_item(trans, root, path, rec);
2844         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2845                 ret = repair_inode_nlinks(trans, root, path, rec);
2846         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2847                 ret = repair_inode_nbytes(trans, root, path, rec);
2848         btrfs_commit_transaction(trans, root);
2849         btrfs_free_path(path);
2850         return ret;
2851 }
2852
2853 static int check_inode_recs(struct btrfs_root *root,
2854                             struct cache_tree *inode_cache)
2855 {
2856         struct cache_extent *cache;
2857         struct ptr_node *node;
2858         struct inode_record *rec;
2859         struct inode_backref *backref;
2860         int stage = 0;
2861         int ret = 0;
2862         int err = 0;
2863         u64 error = 0;
2864         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2865
2866         if (btrfs_root_refs(&root->root_item) == 0) {
2867                 if (!cache_tree_empty(inode_cache))
2868                         fprintf(stderr, "warning line %d\n", __LINE__);
2869                 return 0;
2870         }
2871
2872         /*
2873          * We need to record the highest inode number for later 'lost+found'
2874          * dir creation.
2875          * We must select a ino not used/refered by any existing inode, or
2876          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2877          * this may cause 'lost+found' dir has wrong nlinks.
2878          */
2879         cache = last_cache_extent(inode_cache);
2880         if (cache) {
2881                 node = container_of(cache, struct ptr_node, cache);
2882                 rec = node->data;
2883                 if (rec->ino > root->highest_inode)
2884                         root->highest_inode = rec->ino;
2885         }
2886
2887         /*
2888          * We need to repair backrefs first because we could change some of the
2889          * errors in the inode recs.
2890          *
2891          * We also need to go through and delete invalid backrefs first and then
2892          * add the correct ones second.  We do this because we may get EEXIST
2893          * when adding back the correct index because we hadn't yet deleted the
2894          * invalid index.
2895          *
2896          * For example, if we were missing a dir index then the directories
2897          * isize would be wrong, so if we fixed the isize to what we thought it
2898          * would be and then fixed the backref we'd still have a invalid fs, so
2899          * we need to add back the dir index and then check to see if the isize
2900          * is still wrong.
2901          */
2902         while (stage < 3) {
2903                 stage++;
2904                 if (stage == 3 && !err)
2905                         break;
2906
2907                 cache = search_cache_extent(inode_cache, 0);
2908                 while (repair && cache) {
2909                         node = container_of(cache, struct ptr_node, cache);
2910                         rec = node->data;
2911                         cache = next_cache_extent(cache);
2912
2913                         /* Need to free everything up and rescan */
2914                         if (stage == 3) {
2915                                 remove_cache_extent(inode_cache, &node->cache);
2916                                 free(node);
2917                                 free_inode_rec(rec);
2918                                 continue;
2919                         }
2920
2921                         if (list_empty(&rec->backrefs))
2922                                 continue;
2923
2924                         ret = repair_inode_backrefs(root, rec, inode_cache,
2925                                                     stage == 1);
2926                         if (ret < 0) {
2927                                 err = ret;
2928                                 stage = 2;
2929                                 break;
2930                         } if (ret > 0) {
2931                                 err = -EAGAIN;
2932                         }
2933                 }
2934         }
2935         if (err)
2936                 return err;
2937
2938         rec = get_inode_rec(inode_cache, root_dirid, 0);
2939         BUG_ON(IS_ERR(rec));
2940         if (rec) {
2941                 ret = check_root_dir(rec);
2942                 if (ret) {
2943                         fprintf(stderr, "root %llu root dir %llu error\n",
2944                                 (unsigned long long)root->root_key.objectid,
2945                                 (unsigned long long)root_dirid);
2946                         print_inode_error(root, rec);
2947                         error++;
2948                 }
2949         } else {
2950                 if (repair) {
2951                         struct btrfs_trans_handle *trans;
2952
2953                         trans = btrfs_start_transaction(root, 1);
2954                         if (IS_ERR(trans)) {
2955                                 err = PTR_ERR(trans);
2956                                 return err;
2957                         }
2958
2959                         fprintf(stderr,
2960                                 "root %llu missing its root dir, recreating\n",
2961                                 (unsigned long long)root->objectid);
2962
2963                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2964                         BUG_ON(ret);
2965
2966                         btrfs_commit_transaction(trans, root);
2967                         return -EAGAIN;
2968                 }
2969
2970                 fprintf(stderr, "root %llu root dir %llu not found\n",
2971                         (unsigned long long)root->root_key.objectid,
2972                         (unsigned long long)root_dirid);
2973         }
2974
2975         while (1) {
2976                 cache = search_cache_extent(inode_cache, 0);
2977                 if (!cache)
2978                         break;
2979                 node = container_of(cache, struct ptr_node, cache);
2980                 rec = node->data;
2981                 remove_cache_extent(inode_cache, &node->cache);
2982                 free(node);
2983                 if (rec->ino == root_dirid ||
2984                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2985                         free_inode_rec(rec);
2986                         continue;
2987                 }
2988
2989                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2990                         ret = check_orphan_item(root, rec->ino);
2991                         if (ret == 0)
2992                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2993                         if (can_free_inode_rec(rec)) {
2994                                 free_inode_rec(rec);
2995                                 continue;
2996                         }
2997                 }
2998
2999                 if (!rec->found_inode_item)
3000                         rec->errors |= I_ERR_NO_INODE_ITEM;
3001                 if (rec->found_link != rec->nlink)
3002                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3003                 if (repair) {
3004                         ret = try_repair_inode(root, rec);
3005                         if (ret == 0 && can_free_inode_rec(rec)) {
3006                                 free_inode_rec(rec);
3007                                 continue;
3008                         }
3009                         ret = 0;
3010                 }
3011
3012                 if (!(repair && ret == 0))
3013                         error++;
3014                 print_inode_error(root, rec);
3015                 list_for_each_entry(backref, &rec->backrefs, list) {
3016                         if (!backref->found_dir_item)
3017                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3018                         if (!backref->found_dir_index)
3019                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3020                         if (!backref->found_inode_ref)
3021                                 backref->errors |= REF_ERR_NO_INODE_REF;
3022                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3023                                 " namelen %u name %s filetype %d errors %x",
3024                                 (unsigned long long)backref->dir,
3025                                 (unsigned long long)backref->index,
3026                                 backref->namelen, backref->name,
3027                                 backref->filetype, backref->errors);
3028                         print_ref_error(backref->errors);
3029                 }
3030                 free_inode_rec(rec);
3031         }
3032         return (error > 0) ? -1 : 0;
3033 }
3034
3035 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3036                                         u64 objectid)
3037 {
3038         struct cache_extent *cache;
3039         struct root_record *rec = NULL;
3040         int ret;
3041
3042         cache = lookup_cache_extent(root_cache, objectid, 1);
3043         if (cache) {
3044                 rec = container_of(cache, struct root_record, cache);
3045         } else {
3046                 rec = calloc(1, sizeof(*rec));
3047                 if (!rec)
3048                         return ERR_PTR(-ENOMEM);
3049                 rec->objectid = objectid;
3050                 INIT_LIST_HEAD(&rec->backrefs);
3051                 rec->cache.start = objectid;
3052                 rec->cache.size = 1;
3053
3054                 ret = insert_cache_extent(root_cache, &rec->cache);
3055                 if (ret)
3056                         return ERR_PTR(-EEXIST);
3057         }
3058         return rec;
3059 }
3060
3061 static struct root_backref *get_root_backref(struct root_record *rec,
3062                                              u64 ref_root, u64 dir, u64 index,
3063                                              const char *name, int namelen)
3064 {
3065         struct root_backref *backref;
3066
3067         list_for_each_entry(backref, &rec->backrefs, list) {
3068                 if (backref->ref_root != ref_root || backref->dir != dir ||
3069                     backref->namelen != namelen)
3070                         continue;
3071                 if (memcmp(name, backref->name, namelen))
3072                         continue;
3073                 return backref;
3074         }
3075
3076         backref = calloc(1, sizeof(*backref) + namelen + 1);
3077         if (!backref)
3078                 return NULL;
3079         backref->ref_root = ref_root;
3080         backref->dir = dir;
3081         backref->index = index;
3082         backref->namelen = namelen;
3083         memcpy(backref->name, name, namelen);
3084         backref->name[namelen] = '\0';
3085         list_add_tail(&backref->list, &rec->backrefs);
3086         return backref;
3087 }
3088
3089 static void free_root_record(struct cache_extent *cache)
3090 {
3091         struct root_record *rec;
3092         struct root_backref *backref;
3093
3094         rec = container_of(cache, struct root_record, cache);
3095         while (!list_empty(&rec->backrefs)) {
3096                 backref = list_entry(rec->backrefs.next,
3097                                      struct root_backref, list);
3098                 list_del(&backref->list);
3099                 free(backref);
3100         }
3101
3102         kfree(rec);
3103 }
3104
3105 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3106
3107 static int add_root_backref(struct cache_tree *root_cache,
3108                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3109                             const char *name, int namelen,
3110                             int item_type, int errors)
3111 {
3112         struct root_record *rec;
3113         struct root_backref *backref;
3114
3115         rec = get_root_rec(root_cache, root_id);
3116         BUG_ON(IS_ERR(rec));
3117         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3118         BUG_ON(!backref);
3119
3120         backref->errors |= errors;
3121
3122         if (item_type != BTRFS_DIR_ITEM_KEY) {
3123                 if (backref->found_dir_index || backref->found_back_ref ||
3124                     backref->found_forward_ref) {
3125                         if (backref->index != index)
3126                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3127                 } else {
3128                         backref->index = index;
3129                 }
3130         }
3131
3132         if (item_type == BTRFS_DIR_ITEM_KEY) {
3133                 if (backref->found_forward_ref)
3134                         rec->found_ref++;
3135                 backref->found_dir_item = 1;
3136         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3137                 backref->found_dir_index = 1;
3138         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3139                 if (backref->found_forward_ref)
3140                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3141                 else if (backref->found_dir_item)
3142                         rec->found_ref++;
3143                 backref->found_forward_ref = 1;
3144         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3145                 if (backref->found_back_ref)
3146                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3147                 backref->found_back_ref = 1;
3148         } else {
3149                 BUG_ON(1);
3150         }
3151
3152         if (backref->found_forward_ref && backref->found_dir_item)
3153                 backref->reachable = 1;
3154         return 0;
3155 }
3156
3157 static int merge_root_recs(struct btrfs_root *root,
3158                            struct cache_tree *src_cache,
3159                            struct cache_tree *dst_cache)
3160 {
3161         struct cache_extent *cache;
3162         struct ptr_node *node;
3163         struct inode_record *rec;
3164         struct inode_backref *backref;
3165         int ret = 0;
3166
3167         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3168                 free_inode_recs_tree(src_cache);
3169                 return 0;
3170         }
3171
3172         while (1) {
3173                 cache = search_cache_extent(src_cache, 0);
3174                 if (!cache)
3175                         break;
3176                 node = container_of(cache, struct ptr_node, cache);
3177                 rec = node->data;
3178                 remove_cache_extent(src_cache, &node->cache);
3179                 free(node);
3180
3181                 ret = is_child_root(root, root->objectid, rec->ino);
3182                 if (ret < 0)
3183                         break;
3184                 else if (ret == 0)
3185                         goto skip;
3186
3187                 list_for_each_entry(backref, &rec->backrefs, list) {
3188                         BUG_ON(backref->found_inode_ref);
3189                         if (backref->found_dir_item)
3190                                 add_root_backref(dst_cache, rec->ino,
3191                                         root->root_key.objectid, backref->dir,
3192                                         backref->index, backref->name,
3193                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3194                                         backref->errors);
3195                         if (backref->found_dir_index)
3196                                 add_root_backref(dst_cache, rec->ino,
3197                                         root->root_key.objectid, backref->dir,
3198                                         backref->index, backref->name,
3199                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3200                                         backref->errors);
3201                 }
3202 skip:
3203                 free_inode_rec(rec);
3204         }
3205         if (ret < 0)
3206                 return ret;
3207         return 0;
3208 }
3209
3210 static int check_root_refs(struct btrfs_root *root,
3211                            struct cache_tree *root_cache)
3212 {
3213         struct root_record *rec;
3214         struct root_record *ref_root;
3215         struct root_backref *backref;
3216         struct cache_extent *cache;
3217         int loop = 1;
3218         int ret;
3219         int error;
3220         int errors = 0;
3221
3222         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3223         BUG_ON(IS_ERR(rec));
3224         rec->found_ref = 1;
3225
3226         /* fixme: this can not detect circular references */
3227         while (loop) {
3228                 loop = 0;
3229                 cache = search_cache_extent(root_cache, 0);
3230                 while (1) {
3231                         if (!cache)
3232                                 break;
3233                         rec = container_of(cache, struct root_record, cache);
3234                         cache = next_cache_extent(cache);
3235
3236                         if (rec->found_ref == 0)
3237                                 continue;
3238
3239                         list_for_each_entry(backref, &rec->backrefs, list) {
3240                                 if (!backref->reachable)
3241                                         continue;
3242
3243                                 ref_root = get_root_rec(root_cache,
3244                                                         backref->ref_root);
3245                                 BUG_ON(IS_ERR(ref_root));
3246                                 if (ref_root->found_ref > 0)
3247                                         continue;
3248
3249                                 backref->reachable = 0;
3250                                 rec->found_ref--;
3251                                 if (rec->found_ref == 0)
3252                                         loop = 1;
3253                         }
3254                 }
3255         }
3256
3257         cache = search_cache_extent(root_cache, 0);
3258         while (1) {
3259                 if (!cache)
3260                         break;
3261                 rec = container_of(cache, struct root_record, cache);
3262                 cache = next_cache_extent(cache);
3263
3264                 if (rec->found_ref == 0 &&
3265                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3266                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3267                         ret = check_orphan_item(root->fs_info->tree_root,
3268                                                 rec->objectid);
3269                         if (ret == 0)
3270                                 continue;
3271
3272                         /*
3273                          * If we don't have a root item then we likely just have
3274                          * a dir item in a snapshot for this root but no actual
3275                          * ref key or anything so it's meaningless.
3276                          */
3277                         if (!rec->found_root_item)
3278                                 continue;
3279                         errors++;
3280                         fprintf(stderr, "fs tree %llu not referenced\n",
3281                                 (unsigned long long)rec->objectid);
3282                 }
3283
3284                 error = 0;
3285                 if (rec->found_ref > 0 && !rec->found_root_item)
3286                         error = 1;
3287                 list_for_each_entry(backref, &rec->backrefs, list) {
3288                         if (!backref->found_dir_item)
3289                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3290                         if (!backref->found_dir_index)
3291                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3292                         if (!backref->found_back_ref)
3293                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3294                         if (!backref->found_forward_ref)
3295                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3296                         if (backref->reachable && backref->errors)
3297                                 error = 1;
3298                 }
3299                 if (!error)
3300                         continue;
3301
3302                 errors++;
3303                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3304                         (unsigned long long)rec->objectid, rec->found_ref,
3305                          rec->found_root_item ? "" : "not found");
3306
3307                 list_for_each_entry(backref, &rec->backrefs, list) {
3308                         if (!backref->reachable)
3309                                 continue;
3310                         if (!backref->errors && rec->found_root_item)
3311                                 continue;
3312                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3313                                 " index %llu namelen %u name %s errors %x\n",
3314                                 (unsigned long long)backref->ref_root,
3315                                 (unsigned long long)backref->dir,
3316                                 (unsigned long long)backref->index,
3317                                 backref->namelen, backref->name,
3318                                 backref->errors);
3319                         print_ref_error(backref->errors);
3320                 }
3321         }
3322         return errors > 0 ? 1 : 0;
3323 }
3324
3325 static int process_root_ref(struct extent_buffer *eb, int slot,
3326                             struct btrfs_key *key,
3327                             struct cache_tree *root_cache)
3328 {
3329         u64 dirid;
3330         u64 index;
3331         u32 len;
3332         u32 name_len;
3333         struct btrfs_root_ref *ref;
3334         char namebuf[BTRFS_NAME_LEN];
3335         int error;
3336
3337         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3338
3339         dirid = btrfs_root_ref_dirid(eb, ref);
3340         index = btrfs_root_ref_sequence(eb, ref);
3341         name_len = btrfs_root_ref_name_len(eb, ref);
3342
3343         if (name_len <= BTRFS_NAME_LEN) {
3344                 len = name_len;
3345                 error = 0;
3346         } else {
3347                 len = BTRFS_NAME_LEN;
3348                 error = REF_ERR_NAME_TOO_LONG;
3349         }
3350         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3351
3352         if (key->type == BTRFS_ROOT_REF_KEY) {
3353                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3354                                  index, namebuf, len, key->type, error);
3355         } else {
3356                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3357                                  index, namebuf, len, key->type, error);
3358         }
3359         return 0;
3360 }
3361
3362 static void free_corrupt_block(struct cache_extent *cache)
3363 {
3364         struct btrfs_corrupt_block *corrupt;
3365
3366         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3367         free(corrupt);
3368 }
3369
3370 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3371
3372 /*
3373  * Repair the btree of the given root.
3374  *
3375  * The fix is to remove the node key in corrupt_blocks cache_tree.
3376  * and rebalance the tree.
3377  * After the fix, the btree should be writeable.
3378  */
3379 static int repair_btree(struct btrfs_root *root,
3380                         struct cache_tree *corrupt_blocks)
3381 {
3382         struct btrfs_trans_handle *trans;
3383         struct btrfs_path *path;
3384         struct btrfs_corrupt_block *corrupt;
3385         struct cache_extent *cache;
3386         struct btrfs_key key;
3387         u64 offset;
3388         int level;
3389         int ret = 0;
3390
3391         if (cache_tree_empty(corrupt_blocks))
3392                 return 0;
3393
3394         path = btrfs_alloc_path();
3395         if (!path)
3396                 return -ENOMEM;
3397
3398         trans = btrfs_start_transaction(root, 1);
3399         if (IS_ERR(trans)) {
3400                 ret = PTR_ERR(trans);
3401                 fprintf(stderr, "Error starting transaction: %s\n",
3402                         strerror(-ret));
3403                 goto out_free_path;
3404         }
3405         cache = first_cache_extent(corrupt_blocks);
3406         while (cache) {
3407                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3408                                        cache);
3409                 level = corrupt->level;
3410                 path->lowest_level = level;
3411                 key.objectid = corrupt->key.objectid;
3412                 key.type = corrupt->key.type;
3413                 key.offset = corrupt->key.offset;
3414
3415                 /*
3416                  * Here we don't want to do any tree balance, since it may
3417                  * cause a balance with corrupted brother leaf/node,
3418                  * so ins_len set to 0 here.
3419                  * Balance will be done after all corrupt node/leaf is deleted.
3420                  */
3421                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3422                 if (ret < 0)
3423                         goto out;
3424                 offset = btrfs_node_blockptr(path->nodes[level],
3425                                              path->slots[level]);
3426
3427                 /* Remove the ptr */
3428                 ret = btrfs_del_ptr(trans, root, path, level,
3429                                     path->slots[level]);
3430                 if (ret < 0)
3431                         goto out;
3432                 /*
3433                  * Remove the corresponding extent
3434                  * return value is not concerned.
3435                  */
3436                 btrfs_release_path(path);
3437                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3438                                         0, root->root_key.objectid,
3439                                         level - 1, 0);
3440                 cache = next_cache_extent(cache);
3441         }
3442
3443         /* Balance the btree using btrfs_search_slot() */
3444         cache = first_cache_extent(corrupt_blocks);
3445         while (cache) {
3446                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3447                                        cache);
3448                 memcpy(&key, &corrupt->key, sizeof(key));
3449                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3450                 if (ret < 0)
3451                         goto out;
3452                 /* return will always >0 since it won't find the item */
3453                 ret = 0;
3454                 btrfs_release_path(path);
3455                 cache = next_cache_extent(cache);
3456         }
3457 out:
3458         btrfs_commit_transaction(trans, root);
3459 out_free_path:
3460         btrfs_free_path(path);
3461         return ret;
3462 }
3463
3464 static int check_fs_root(struct btrfs_root *root,
3465                          struct cache_tree *root_cache,
3466                          struct walk_control *wc)
3467 {
3468         int ret = 0;
3469         int err = 0;
3470         int wret;
3471         int level;
3472         struct btrfs_path path;
3473         struct shared_node root_node;
3474         struct root_record *rec;
3475         struct btrfs_root_item *root_item = &root->root_item;
3476         struct cache_tree corrupt_blocks;
3477         struct orphan_data_extent *orphan;
3478         struct orphan_data_extent *tmp;
3479         enum btrfs_tree_block_status status;
3480
3481         /*
3482          * Reuse the corrupt_block cache tree to record corrupted tree block
3483          *
3484          * Unlike the usage in extent tree check, here we do it in a per
3485          * fs/subvol tree base.
3486          */
3487         cache_tree_init(&corrupt_blocks);
3488         root->fs_info->corrupt_blocks = &corrupt_blocks;
3489
3490         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3491                 rec = get_root_rec(root_cache, root->root_key.objectid);
3492                 BUG_ON(IS_ERR(rec));
3493                 if (btrfs_root_refs(root_item) > 0)
3494                         rec->found_root_item = 1;
3495         }
3496
3497         btrfs_init_path(&path);
3498         memset(&root_node, 0, sizeof(root_node));
3499         cache_tree_init(&root_node.root_cache);
3500         cache_tree_init(&root_node.inode_cache);
3501
3502         /* Move the orphan extent record to corresponding inode_record */
3503         list_for_each_entry_safe(orphan, tmp,
3504                                  &root->orphan_data_extents, list) {
3505                 struct inode_record *inode;
3506
3507                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3508                                       1);
3509                 BUG_ON(IS_ERR(inode));
3510                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3511                 list_move(&orphan->list, &inode->orphan_extents);
3512         }
3513
3514         level = btrfs_header_level(root->node);
3515         memset(wc->nodes, 0, sizeof(wc->nodes));
3516         wc->nodes[level] = &root_node;
3517         wc->active_node = level;
3518         wc->root_level = level;
3519
3520         /* We may not have checked the root block, lets do that now */
3521         if (btrfs_is_leaf(root->node))
3522                 status = btrfs_check_leaf(root, NULL, root->node);
3523         else
3524                 status = btrfs_check_node(root, NULL, root->node);
3525         if (status != BTRFS_TREE_BLOCK_CLEAN)
3526                 return -EIO;
3527
3528         if (btrfs_root_refs(root_item) > 0 ||
3529             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3530                 path.nodes[level] = root->node;
3531                 extent_buffer_get(root->node);
3532                 path.slots[level] = 0;
3533         } else {
3534                 struct btrfs_key key;
3535                 struct btrfs_disk_key found_key;
3536
3537                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3538                 level = root_item->drop_level;
3539                 path.lowest_level = level;
3540                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3541                 if (wret < 0)
3542                         goto skip_walking;
3543                 btrfs_node_key(path.nodes[level], &found_key,
3544                                 path.slots[level]);
3545                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3546                                         sizeof(found_key)));
3547         }
3548
3549         while (1) {
3550                 wret = walk_down_tree(root, &path, wc, &level);
3551                 if (wret < 0)
3552                         ret = wret;
3553                 if (wret != 0)
3554                         break;
3555
3556                 wret = walk_up_tree(root, &path, wc, &level);
3557                 if (wret < 0)
3558                         ret = wret;
3559                 if (wret != 0)
3560                         break;
3561         }
3562 skip_walking:
3563         btrfs_release_path(&path);
3564
3565         if (!cache_tree_empty(&corrupt_blocks)) {
3566                 struct cache_extent *cache;
3567                 struct btrfs_corrupt_block *corrupt;
3568
3569                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3570                        root->root_key.objectid);
3571                 cache = first_cache_extent(&corrupt_blocks);
3572                 while (cache) {
3573                         corrupt = container_of(cache,
3574                                                struct btrfs_corrupt_block,
3575                                                cache);
3576                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3577                                cache->start, corrupt->level,
3578                                corrupt->key.objectid, corrupt->key.type,
3579                                corrupt->key.offset);
3580                         cache = next_cache_extent(cache);
3581                 }
3582                 if (repair) {
3583                         printf("Try to repair the btree for root %llu\n",
3584                                root->root_key.objectid);
3585                         ret = repair_btree(root, &corrupt_blocks);
3586                         if (ret < 0)
3587                                 fprintf(stderr, "Failed to repair btree: %s\n",
3588                                         strerror(-ret));
3589                         if (!ret)
3590                                 printf("Btree for root %llu is fixed\n",
3591                                        root->root_key.objectid);
3592                 }
3593         }
3594
3595         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3596         if (err < 0)
3597                 ret = err;
3598
3599         if (root_node.current) {
3600                 root_node.current->checked = 1;
3601                 maybe_free_inode_rec(&root_node.inode_cache,
3602                                 root_node.current);
3603         }
3604
3605         err = check_inode_recs(root, &root_node.inode_cache);
3606         if (!ret)
3607                 ret = err;
3608
3609         free_corrupt_blocks_tree(&corrupt_blocks);
3610         root->fs_info->corrupt_blocks = NULL;
3611         free_orphan_data_extents(&root->orphan_data_extents);
3612         return ret;
3613 }
3614
3615 static int fs_root_objectid(u64 objectid)
3616 {
3617         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3618             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3619                 return 1;
3620         return is_fstree(objectid);
3621 }
3622
3623 static int check_fs_roots(struct btrfs_root *root,
3624                           struct cache_tree *root_cache)
3625 {
3626         struct btrfs_path path;
3627         struct btrfs_key key;
3628         struct walk_control wc;
3629         struct extent_buffer *leaf, *tree_node;
3630         struct btrfs_root *tmp_root;
3631         struct btrfs_root *tree_root = root->fs_info->tree_root;
3632         int ret;
3633         int err = 0;
3634
3635         if (ctx.progress_enabled) {
3636                 ctx.tp = TASK_FS_ROOTS;
3637                 task_start(ctx.info);
3638         }
3639
3640         /*
3641          * Just in case we made any changes to the extent tree that weren't
3642          * reflected into the free space cache yet.
3643          */
3644         if (repair)
3645                 reset_cached_block_groups(root->fs_info);
3646         memset(&wc, 0, sizeof(wc));
3647         cache_tree_init(&wc.shared);
3648         btrfs_init_path(&path);
3649
3650 again:
3651         key.offset = 0;
3652         key.objectid = 0;
3653         key.type = BTRFS_ROOT_ITEM_KEY;
3654         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3655         if (ret < 0) {
3656                 err = 1;
3657                 goto out;
3658         }
3659         tree_node = tree_root->node;
3660         while (1) {
3661                 if (tree_node != tree_root->node) {
3662                         free_root_recs_tree(root_cache);
3663                         btrfs_release_path(&path);
3664                         goto again;
3665                 }
3666                 leaf = path.nodes[0];
3667                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3668                         ret = btrfs_next_leaf(tree_root, &path);
3669                         if (ret) {
3670                                 if (ret < 0)
3671                                         err = 1;
3672                                 break;
3673                         }
3674                         leaf = path.nodes[0];
3675                 }
3676                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3677                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3678                     fs_root_objectid(key.objectid)) {
3679                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3680                                 tmp_root = btrfs_read_fs_root_no_cache(
3681                                                 root->fs_info, &key);
3682                         } else {
3683                                 key.offset = (u64)-1;
3684                                 tmp_root = btrfs_read_fs_root(
3685                                                 root->fs_info, &key);
3686                         }
3687                         if (IS_ERR(tmp_root)) {
3688                                 err = 1;
3689                                 goto next;
3690                         }
3691                         ret = check_fs_root(tmp_root, root_cache, &wc);
3692                         if (ret == -EAGAIN) {
3693                                 free_root_recs_tree(root_cache);
3694                                 btrfs_release_path(&path);
3695                                 goto again;
3696                         }
3697                         if (ret)
3698                                 err = 1;
3699                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3700                                 btrfs_free_fs_root(tmp_root);
3701                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3702                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3703                         process_root_ref(leaf, path.slots[0], &key,
3704                                          root_cache);
3705                 }
3706 next:
3707                 path.slots[0]++;
3708         }
3709 out:
3710         btrfs_release_path(&path);
3711         if (err)
3712                 free_extent_cache_tree(&wc.shared);
3713         if (!cache_tree_empty(&wc.shared))
3714                 fprintf(stderr, "warning line %d\n", __LINE__);
3715
3716         task_stop(ctx.info);
3717
3718         return err;
3719 }
3720
3721 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3722 {
3723         struct list_head *cur = rec->backrefs.next;
3724         struct extent_backref *back;
3725         struct tree_backref *tback;
3726         struct data_backref *dback;
3727         u64 found = 0;
3728         int err = 0;
3729
3730         while(cur != &rec->backrefs) {
3731                 back = list_entry(cur, struct extent_backref, list);
3732                 cur = cur->next;
3733                 if (!back->found_extent_tree) {
3734                         err = 1;
3735                         if (!print_errs)
3736                                 goto out;
3737                         if (back->is_data) {
3738                                 dback = (struct data_backref *)back;
3739                                 fprintf(stderr, "Backref %llu %s %llu"
3740                                         " owner %llu offset %llu num_refs %lu"
3741                                         " not found in extent tree\n",
3742                                         (unsigned long long)rec->start,
3743                                         back->full_backref ?
3744                                         "parent" : "root",
3745                                         back->full_backref ?
3746                                         (unsigned long long)dback->parent:
3747                                         (unsigned long long)dback->root,
3748                                         (unsigned long long)dback->owner,
3749                                         (unsigned long long)dback->offset,
3750                                         (unsigned long)dback->num_refs);
3751                         } else {
3752                                 tback = (struct tree_backref *)back;
3753                                 fprintf(stderr, "Backref %llu parent %llu"
3754                                         " root %llu not found in extent tree\n",
3755                                         (unsigned long long)rec->start,
3756                                         (unsigned long long)tback->parent,
3757                                         (unsigned long long)tback->root);
3758                         }
3759                 }
3760                 if (!back->is_data && !back->found_ref) {
3761                         err = 1;
3762                         if (!print_errs)
3763                                 goto out;
3764                         tback = (struct tree_backref *)back;
3765                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3766                                 (unsigned long long)rec->start,
3767                                 back->full_backref ? "parent" : "root",
3768                                 back->full_backref ?
3769                                 (unsigned long long)tback->parent :
3770                                 (unsigned long long)tback->root, back);
3771                 }
3772                 if (back->is_data) {
3773                         dback = (struct data_backref *)back;
3774                         if (dback->found_ref != dback->num_refs) {
3775                                 err = 1;
3776                                 if (!print_errs)
3777                                         goto out;
3778                                 fprintf(stderr, "Incorrect local backref count"
3779                                         " on %llu %s %llu owner %llu"
3780                                         " offset %llu found %u wanted %u back %p\n",
3781                                         (unsigned long long)rec->start,
3782                                         back->full_backref ?
3783                                         "parent" : "root",
3784                                         back->full_backref ?
3785                                         (unsigned long long)dback->parent:
3786                                         (unsigned long long)dback->root,
3787                                         (unsigned long long)dback->owner,
3788                                         (unsigned long long)dback->offset,
3789                                         dback->found_ref, dback->num_refs, back);
3790                         }
3791                         if (dback->disk_bytenr != rec->start) {
3792                                 err = 1;
3793                                 if (!print_errs)
3794                                         goto out;
3795                                 fprintf(stderr, "Backref disk bytenr does not"
3796                                         " match extent record, bytenr=%llu, "
3797                                         "ref bytenr=%llu\n",
3798                                         (unsigned long long)rec->start,
3799                                         (unsigned long long)dback->disk_bytenr);
3800                         }
3801
3802                         if (dback->bytes != rec->nr) {
3803                                 err = 1;
3804                                 if (!print_errs)
3805                                         goto out;
3806                                 fprintf(stderr, "Backref bytes do not match "
3807                                         "extent backref, bytenr=%llu, ref "
3808                                         "bytes=%llu, backref bytes=%llu\n",
3809                                         (unsigned long long)rec->start,
3810                                         (unsigned long long)rec->nr,
3811                                         (unsigned long long)dback->bytes);
3812                         }
3813                 }
3814                 if (!back->is_data) {
3815                         found += 1;
3816                 } else {
3817                         dback = (struct data_backref *)back;
3818                         found += dback->found_ref;
3819                 }
3820         }
3821         if (found != rec->refs) {
3822                 err = 1;
3823                 if (!print_errs)
3824                         goto out;
3825                 fprintf(stderr, "Incorrect global backref count "
3826                         "on %llu found %llu wanted %llu\n",
3827                         (unsigned long long)rec->start,
3828                         (unsigned long long)found,
3829                         (unsigned long long)rec->refs);
3830         }
3831 out:
3832         return err;
3833 }
3834
3835 static int free_all_extent_backrefs(struct extent_record *rec)
3836 {
3837         struct extent_backref *back;
3838         struct list_head *cur;
3839         while (!list_empty(&rec->backrefs)) {
3840                 cur = rec->backrefs.next;
3841                 back = list_entry(cur, struct extent_backref, list);
3842                 list_del(cur);
3843                 free(back);
3844         }
3845         return 0;
3846 }
3847
3848 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3849                                      struct cache_tree *extent_cache)
3850 {
3851         struct cache_extent *cache;
3852         struct extent_record *rec;
3853
3854         while (1) {
3855                 cache = first_cache_extent(extent_cache);
3856                 if (!cache)
3857                         break;
3858                 rec = container_of(cache, struct extent_record, cache);
3859                 remove_cache_extent(extent_cache, cache);
3860                 free_all_extent_backrefs(rec);
3861                 free(rec);
3862         }
3863 }
3864
3865 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3866                                  struct extent_record *rec)
3867 {
3868         if (rec->content_checked && rec->owner_ref_checked &&
3869             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3870             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3871             !rec->bad_full_backref && !rec->crossing_stripes &&
3872             !rec->wrong_chunk_type) {
3873                 remove_cache_extent(extent_cache, &rec->cache);
3874                 free_all_extent_backrefs(rec);
3875                 list_del_init(&rec->list);
3876                 free(rec);
3877         }
3878         return 0;
3879 }
3880
3881 static int check_owner_ref(struct btrfs_root *root,
3882                             struct extent_record *rec,
3883                             struct extent_buffer *buf)
3884 {
3885         struct extent_backref *node;
3886         struct tree_backref *back;
3887         struct btrfs_root *ref_root;
3888         struct btrfs_key key;
3889         struct btrfs_path path;
3890         struct extent_buffer *parent;
3891         int level;
3892         int found = 0;
3893         int ret;
3894
3895         list_for_each_entry(node, &rec->backrefs, list) {
3896                 if (node->is_data)
3897                         continue;
3898                 if (!node->found_ref)
3899                         continue;
3900                 if (node->full_backref)
3901                         continue;
3902                 back = (struct tree_backref *)node;
3903                 if (btrfs_header_owner(buf) == back->root)
3904                         return 0;
3905         }
3906         BUG_ON(rec->is_root);
3907
3908         /* try to find the block by search corresponding fs tree */
3909         key.objectid = btrfs_header_owner(buf);
3910         key.type = BTRFS_ROOT_ITEM_KEY;
3911         key.offset = (u64)-1;
3912
3913         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3914         if (IS_ERR(ref_root))
3915                 return 1;
3916
3917         level = btrfs_header_level(buf);
3918         if (level == 0)
3919                 btrfs_item_key_to_cpu(buf, &key, 0);
3920         else
3921                 btrfs_node_key_to_cpu(buf, &key, 0);
3922
3923         btrfs_init_path(&path);
3924         path.lowest_level = level + 1;
3925         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3926         if (ret < 0)
3927                 return 0;
3928
3929         parent = path.nodes[level + 1];
3930         if (parent && buf->start == btrfs_node_blockptr(parent,
3931                                                         path.slots[level + 1]))
3932                 found = 1;
3933
3934         btrfs_release_path(&path);
3935         return found ? 0 : 1;
3936 }
3937
3938 static int is_extent_tree_record(struct extent_record *rec)
3939 {
3940         struct list_head *cur = rec->backrefs.next;
3941         struct extent_backref *node;
3942         struct tree_backref *back;
3943         int is_extent = 0;
3944
3945         while(cur != &rec->backrefs) {
3946                 node = list_entry(cur, struct extent_backref, list);
3947                 cur = cur->next;
3948                 if (node->is_data)
3949                         return 0;
3950                 back = (struct tree_backref *)node;
3951                 if (node->full_backref)
3952                         return 0;
3953                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3954                         is_extent = 1;
3955         }
3956         return is_extent;
3957 }
3958
3959
3960 static int record_bad_block_io(struct btrfs_fs_info *info,
3961                                struct cache_tree *extent_cache,
3962                                u64 start, u64 len)
3963 {
3964         struct extent_record *rec;
3965         struct cache_extent *cache;
3966         struct btrfs_key key;
3967
3968         cache = lookup_cache_extent(extent_cache, start, len);
3969         if (!cache)
3970                 return 0;
3971
3972         rec = container_of(cache, struct extent_record, cache);
3973         if (!is_extent_tree_record(rec))
3974                 return 0;
3975
3976         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3977         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3978 }
3979
3980 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3981                        struct extent_buffer *buf, int slot)
3982 {
3983         if (btrfs_header_level(buf)) {
3984                 struct btrfs_key_ptr ptr1, ptr2;
3985
3986                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3987                                    sizeof(struct btrfs_key_ptr));
3988                 read_extent_buffer(buf, &ptr2,
3989                                    btrfs_node_key_ptr_offset(slot + 1),
3990                                    sizeof(struct btrfs_key_ptr));
3991                 write_extent_buffer(buf, &ptr1,
3992                                     btrfs_node_key_ptr_offset(slot + 1),
3993                                     sizeof(struct btrfs_key_ptr));
3994                 write_extent_buffer(buf, &ptr2,
3995                                     btrfs_node_key_ptr_offset(slot),
3996                                     sizeof(struct btrfs_key_ptr));
3997                 if (slot == 0) {
3998                         struct btrfs_disk_key key;
3999                         btrfs_node_key(buf, &key, 0);
4000                         btrfs_fixup_low_keys(root, path, &key,
4001                                              btrfs_header_level(buf) + 1);
4002                 }
4003         } else {
4004                 struct btrfs_item *item1, *item2;
4005                 struct btrfs_key k1, k2;
4006                 char *item1_data, *item2_data;
4007                 u32 item1_offset, item2_offset, item1_size, item2_size;
4008
4009                 item1 = btrfs_item_nr(slot);
4010                 item2 = btrfs_item_nr(slot + 1);
4011                 btrfs_item_key_to_cpu(buf, &k1, slot);
4012                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4013                 item1_offset = btrfs_item_offset(buf, item1);
4014                 item2_offset = btrfs_item_offset(buf, item2);
4015                 item1_size = btrfs_item_size(buf, item1);
4016                 item2_size = btrfs_item_size(buf, item2);
4017
4018                 item1_data = malloc(item1_size);
4019                 if (!item1_data)
4020                         return -ENOMEM;
4021                 item2_data = malloc(item2_size);
4022                 if (!item2_data) {
4023                         free(item1_data);
4024                         return -ENOMEM;
4025                 }
4026
4027                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4028                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4029
4030                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4031                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4032                 free(item1_data);
4033                 free(item2_data);
4034
4035                 btrfs_set_item_offset(buf, item1, item2_offset);
4036                 btrfs_set_item_offset(buf, item2, item1_offset);
4037                 btrfs_set_item_size(buf, item1, item2_size);
4038                 btrfs_set_item_size(buf, item2, item1_size);
4039
4040                 path->slots[0] = slot;
4041                 btrfs_set_item_key_unsafe(root, path, &k2);
4042                 path->slots[0] = slot + 1;
4043                 btrfs_set_item_key_unsafe(root, path, &k1);
4044         }
4045         return 0;
4046 }
4047
4048 static int fix_key_order(struct btrfs_trans_handle *trans,
4049                          struct btrfs_root *root,
4050                          struct btrfs_path *path)
4051 {
4052         struct extent_buffer *buf;
4053         struct btrfs_key k1, k2;
4054         int i;
4055         int level = path->lowest_level;
4056         int ret = -EIO;
4057
4058         buf = path->nodes[level];
4059         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4060                 if (level) {
4061                         btrfs_node_key_to_cpu(buf, &k1, i);
4062                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4063                 } else {
4064                         btrfs_item_key_to_cpu(buf, &k1, i);
4065                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4066                 }
4067                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4068                         continue;
4069                 ret = swap_values(root, path, buf, i);
4070                 if (ret)
4071                         break;
4072                 btrfs_mark_buffer_dirty(buf);
4073                 i = 0;
4074         }
4075         return ret;
4076 }
4077
4078 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4079                              struct btrfs_root *root,
4080                              struct btrfs_path *path,
4081                              struct extent_buffer *buf, int slot)
4082 {
4083         struct btrfs_key key;
4084         int nritems = btrfs_header_nritems(buf);
4085
4086         btrfs_item_key_to_cpu(buf, &key, slot);
4087
4088         /* These are all the keys we can deal with missing. */
4089         if (key.type != BTRFS_DIR_INDEX_KEY &&
4090             key.type != BTRFS_EXTENT_ITEM_KEY &&
4091             key.type != BTRFS_METADATA_ITEM_KEY &&
4092             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4093             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4094                 return -1;
4095
4096         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4097                (unsigned long long)key.objectid, key.type,
4098                (unsigned long long)key.offset, slot, buf->start);
4099         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4100                               btrfs_item_nr_offset(slot + 1),
4101                               sizeof(struct btrfs_item) *
4102                               (nritems - slot - 1));
4103         btrfs_set_header_nritems(buf, nritems - 1);
4104         if (slot == 0) {
4105                 struct btrfs_disk_key disk_key;
4106
4107                 btrfs_item_key(buf, &disk_key, 0);
4108                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4109         }
4110         btrfs_mark_buffer_dirty(buf);
4111         return 0;
4112 }
4113
4114 static int fix_item_offset(struct btrfs_trans_handle *trans,
4115                            struct btrfs_root *root,
4116                            struct btrfs_path *path)
4117 {
4118         struct extent_buffer *buf;
4119         int i;
4120         int ret = 0;
4121
4122         /* We should only get this for leaves */
4123         BUG_ON(path->lowest_level);
4124         buf = path->nodes[0];
4125 again:
4126         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4127                 unsigned int shift = 0, offset;
4128
4129                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4130                     BTRFS_LEAF_DATA_SIZE(root)) {
4131                         if (btrfs_item_end_nr(buf, i) >
4132                             BTRFS_LEAF_DATA_SIZE(root)) {
4133                                 ret = delete_bogus_item(trans, root, path,
4134                                                         buf, i);
4135                                 if (!ret)
4136                                         goto again;
4137                                 fprintf(stderr, "item is off the end of the "
4138                                         "leaf, can't fix\n");
4139                                 ret = -EIO;
4140                                 break;
4141                         }
4142                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4143                                 btrfs_item_end_nr(buf, i);
4144                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4145                            btrfs_item_offset_nr(buf, i - 1)) {
4146                         if (btrfs_item_end_nr(buf, i) >
4147                             btrfs_item_offset_nr(buf, i - 1)) {
4148                                 ret = delete_bogus_item(trans, root, path,
4149                                                         buf, i);
4150                                 if (!ret)
4151                                         goto again;
4152                                 fprintf(stderr, "items overlap, can't fix\n");
4153                                 ret = -EIO;
4154                                 break;
4155                         }
4156                         shift = btrfs_item_offset_nr(buf, i - 1) -
4157                                 btrfs_item_end_nr(buf, i);
4158                 }
4159                 if (!shift)
4160                         continue;
4161
4162                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4163                        i, shift, (unsigned long long)buf->start);
4164                 offset = btrfs_item_offset_nr(buf, i);
4165                 memmove_extent_buffer(buf,
4166                                       btrfs_leaf_data(buf) + offset + shift,
4167                                       btrfs_leaf_data(buf) + offset,
4168                                       btrfs_item_size_nr(buf, i));
4169                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4170                                       offset + shift);
4171                 btrfs_mark_buffer_dirty(buf);
4172         }
4173
4174         /*
4175          * We may have moved things, in which case we want to exit so we don't
4176          * write those changes out.  Once we have proper abort functionality in
4177          * progs this can be changed to something nicer.
4178          */
4179         BUG_ON(ret);
4180         return ret;
4181 }
4182
4183 /*
4184  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4185  * then just return -EIO.
4186  */
4187 static int try_to_fix_bad_block(struct btrfs_root *root,
4188                                 struct extent_buffer *buf,
4189                                 enum btrfs_tree_block_status status)
4190 {
4191         struct btrfs_trans_handle *trans;
4192         struct ulist *roots;
4193         struct ulist_node *node;
4194         struct btrfs_root *search_root;
4195         struct btrfs_path *path;
4196         struct ulist_iterator iter;
4197         struct btrfs_key root_key, key;
4198         int ret;
4199
4200         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4201             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4202                 return -EIO;
4203
4204         path = btrfs_alloc_path();
4205         if (!path)
4206                 return -EIO;
4207
4208         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4209                                    0, &roots);
4210         if (ret) {
4211                 btrfs_free_path(path);
4212                 return -EIO;
4213         }
4214
4215         ULIST_ITER_INIT(&iter);
4216         while ((node = ulist_next(roots, &iter))) {
4217                 root_key.objectid = node->val;
4218                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4219                 root_key.offset = (u64)-1;
4220
4221                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4222                 if (IS_ERR(root)) {
4223                         ret = -EIO;
4224                         break;
4225                 }
4226
4227
4228                 trans = btrfs_start_transaction(search_root, 0);
4229                 if (IS_ERR(trans)) {
4230                         ret = PTR_ERR(trans);
4231                         break;
4232                 }
4233
4234                 path->lowest_level = btrfs_header_level(buf);
4235                 path->skip_check_block = 1;
4236                 if (path->lowest_level)
4237                         btrfs_node_key_to_cpu(buf, &key, 0);
4238                 else
4239                         btrfs_item_key_to_cpu(buf, &key, 0);
4240                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4241                 if (ret) {
4242                         ret = -EIO;
4243                         btrfs_commit_transaction(trans, search_root);
4244                         break;
4245                 }
4246                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4247                         ret = fix_key_order(trans, search_root, path);
4248                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4249                         ret = fix_item_offset(trans, search_root, path);
4250                 if (ret) {
4251                         btrfs_commit_transaction(trans, search_root);
4252                         break;
4253                 }
4254                 btrfs_release_path(path);
4255                 btrfs_commit_transaction(trans, search_root);
4256         }
4257         ulist_free(roots);
4258         btrfs_free_path(path);
4259         return ret;
4260 }
4261
4262 static int check_block(struct btrfs_root *root,
4263                        struct cache_tree *extent_cache,
4264                        struct extent_buffer *buf, u64 flags)
4265 {
4266         struct extent_record *rec;
4267         struct cache_extent *cache;
4268         struct btrfs_key key;
4269         enum btrfs_tree_block_status status;
4270         int ret = 0;
4271         int level;
4272
4273         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4274         if (!cache)
4275                 return 1;
4276         rec = container_of(cache, struct extent_record, cache);
4277         rec->generation = btrfs_header_generation(buf);
4278
4279         level = btrfs_header_level(buf);
4280         if (btrfs_header_nritems(buf) > 0) {
4281
4282                 if (level == 0)
4283                         btrfs_item_key_to_cpu(buf, &key, 0);
4284                 else
4285                         btrfs_node_key_to_cpu(buf, &key, 0);
4286
4287                 rec->info_objectid = key.objectid;
4288         }
4289         rec->info_level = level;
4290
4291         if (btrfs_is_leaf(buf))
4292                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4293         else
4294                 status = btrfs_check_node(root, &rec->parent_key, buf);
4295
4296         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4297                 if (repair)
4298                         status = try_to_fix_bad_block(root, buf, status);
4299                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4300                         ret = -EIO;
4301                         fprintf(stderr, "bad block %llu\n",
4302                                 (unsigned long long)buf->start);
4303                 } else {
4304                         /*
4305                          * Signal to callers we need to start the scan over
4306                          * again since we'll have cow'ed blocks.
4307                          */
4308                         ret = -EAGAIN;
4309                 }
4310         } else {
4311                 rec->content_checked = 1;
4312                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4313                         rec->owner_ref_checked = 1;
4314                 else {
4315                         ret = check_owner_ref(root, rec, buf);
4316                         if (!ret)
4317                                 rec->owner_ref_checked = 1;
4318                 }
4319         }
4320         if (!ret)
4321                 maybe_free_extent_rec(extent_cache, rec);
4322         return ret;
4323 }
4324
4325 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4326                                                 u64 parent, u64 root)
4327 {
4328         struct list_head *cur = rec->backrefs.next;
4329         struct extent_backref *node;
4330         struct tree_backref *back;
4331
4332         while(cur != &rec->backrefs) {
4333                 node = list_entry(cur, struct extent_backref, list);
4334                 cur = cur->next;
4335                 if (node->is_data)
4336                         continue;
4337                 back = (struct tree_backref *)node;
4338                 if (parent > 0) {
4339                         if (!node->full_backref)
4340                                 continue;
4341                         if (parent == back->parent)
4342                                 return back;
4343                 } else {
4344                         if (node->full_backref)
4345                                 continue;
4346                         if (back->root == root)
4347                                 return back;
4348                 }
4349         }
4350         return NULL;
4351 }
4352
4353 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4354                                                 u64 parent, u64 root)
4355 {
4356         struct tree_backref *ref = malloc(sizeof(*ref));
4357
4358         if (!ref)
4359                 return NULL;
4360         memset(&ref->node, 0, sizeof(ref->node));
4361         if (parent > 0) {
4362                 ref->parent = parent;
4363                 ref->node.full_backref = 1;
4364         } else {
4365                 ref->root = root;
4366                 ref->node.full_backref = 0;
4367         }
4368         list_add_tail(&ref->node.list, &rec->backrefs);
4369
4370         return ref;
4371 }
4372
4373 static struct data_backref *find_data_backref(struct extent_record *rec,
4374                                                 u64 parent, u64 root,
4375                                                 u64 owner, u64 offset,
4376                                                 int found_ref,
4377                                                 u64 disk_bytenr, u64 bytes)
4378 {
4379         struct list_head *cur = rec->backrefs.next;
4380         struct extent_backref *node;
4381         struct data_backref *back;
4382
4383         while(cur != &rec->backrefs) {
4384                 node = list_entry(cur, struct extent_backref, list);
4385                 cur = cur->next;
4386                 if (!node->is_data)
4387                         continue;
4388                 back = (struct data_backref *)node;
4389                 if (parent > 0) {
4390                         if (!node->full_backref)
4391                                 continue;
4392                         if (parent == back->parent)
4393                                 return back;
4394                 } else {
4395                         if (node->full_backref)
4396                                 continue;
4397                         if (back->root == root && back->owner == owner &&
4398                             back->offset == offset) {
4399                                 if (found_ref && node->found_ref &&
4400                                     (back->bytes != bytes ||
4401                                     back->disk_bytenr != disk_bytenr))
4402                                         continue;
4403                                 return back;
4404                         }
4405                 }
4406         }
4407         return NULL;
4408 }
4409
4410 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4411                                                 u64 parent, u64 root,
4412                                                 u64 owner, u64 offset,
4413                                                 u64 max_size)
4414 {
4415         struct data_backref *ref = malloc(sizeof(*ref));
4416
4417         if (!ref)
4418                 return NULL;
4419         memset(&ref->node, 0, sizeof(ref->node));
4420         ref->node.is_data = 1;
4421
4422         if (parent > 0) {
4423                 ref->parent = parent;
4424                 ref->owner = 0;
4425                 ref->offset = 0;
4426                 ref->node.full_backref = 1;
4427         } else {
4428                 ref->root = root;
4429                 ref->owner = owner;
4430                 ref->offset = offset;
4431                 ref->node.full_backref = 0;
4432         }
4433         ref->bytes = max_size;
4434         ref->found_ref = 0;
4435         ref->num_refs = 0;
4436         list_add_tail(&ref->node.list, &rec->backrefs);
4437         if (max_size > rec->max_size)
4438                 rec->max_size = max_size;
4439         return ref;
4440 }
4441
4442 /* Check if the type of extent matches with its chunk */
4443 static void check_extent_type(struct extent_record *rec)
4444 {
4445         struct btrfs_block_group_cache *bg_cache;
4446
4447         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4448         if (!bg_cache)
4449                 return;
4450
4451         /* data extent, check chunk directly*/
4452         if (!rec->metadata) {
4453                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4454                         rec->wrong_chunk_type = 1;
4455                 return;
4456         }
4457
4458         /* metadata extent, check the obvious case first */
4459         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4460                                  BTRFS_BLOCK_GROUP_METADATA))) {
4461                 rec->wrong_chunk_type = 1;
4462                 return;
4463         }
4464
4465         /*
4466          * Check SYSTEM extent, as it's also marked as metadata, we can only
4467          * make sure it's a SYSTEM extent by its backref
4468          */
4469         if (!list_empty(&rec->backrefs)) {
4470                 struct extent_backref *node;
4471                 struct tree_backref *tback;
4472                 u64 bg_type;
4473
4474                 node = list_entry(rec->backrefs.next, struct extent_backref,
4475                                   list);
4476                 if (node->is_data) {
4477                         /* tree block shouldn't have data backref */
4478                         rec->wrong_chunk_type = 1;
4479                         return;
4480                 }
4481                 tback = container_of(node, struct tree_backref, node);
4482
4483                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4484                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4485                 else
4486                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4487                 if (!(bg_cache->flags & bg_type))
4488                         rec->wrong_chunk_type = 1;
4489         }
4490 }
4491
4492 static int add_extent_rec(struct cache_tree *extent_cache,
4493                           struct btrfs_key *parent_key, u64 parent_gen,
4494                           u64 start, u64 nr, u64 extent_item_refs,
4495                           int is_root, int inc_ref, int set_checked,
4496                           int metadata, int extent_rec, u64 max_size)
4497 {
4498         struct extent_record *rec;
4499         struct cache_extent *cache;
4500         int ret = 0;
4501         int dup = 0;
4502
4503         cache = lookup_cache_extent(extent_cache, start, nr);
4504         if (cache) {
4505                 rec = container_of(cache, struct extent_record, cache);
4506                 if (inc_ref)
4507                         rec->refs++;
4508                 if (rec->nr == 1)
4509                         rec->nr = max(nr, max_size);
4510
4511                 /*
4512                  * We need to make sure to reset nr to whatever the extent
4513                  * record says was the real size, this way we can compare it to
4514                  * the backrefs.
4515                  */
4516                 if (extent_rec) {
4517                         if (start != rec->start || rec->found_rec) {
4518                                 struct extent_record *tmp;
4519
4520                                 dup = 1;
4521                                 if (list_empty(&rec->list))
4522                                         list_add_tail(&rec->list,
4523                                                       &duplicate_extents);
4524
4525                                 /*
4526                                  * We have to do this song and dance in case we
4527                                  * find an extent record that falls inside of
4528                                  * our current extent record but does not have
4529                                  * the same objectid.
4530                                  */
4531                                 tmp = malloc(sizeof(*tmp));
4532                                 if (!tmp)
4533                                         return -ENOMEM;
4534                                 tmp->start = start;
4535                                 tmp->max_size = max_size;
4536                                 tmp->nr = nr;
4537                                 tmp->found_rec = 1;
4538                                 tmp->metadata = metadata;
4539                                 tmp->extent_item_refs = extent_item_refs;
4540                                 INIT_LIST_HEAD(&tmp->list);
4541                                 list_add_tail(&tmp->list, &rec->dups);
4542                                 rec->num_duplicates++;
4543                         } else {
4544                                 rec->nr = nr;
4545                                 rec->found_rec = 1;
4546                         }
4547                 }
4548
4549                 if (extent_item_refs && !dup) {
4550                         if (rec->extent_item_refs) {
4551                                 fprintf(stderr, "block %llu rec "
4552                                         "extent_item_refs %llu, passed %llu\n",
4553                                         (unsigned long long)start,
4554                                         (unsigned long long)
4555                                                         rec->extent_item_refs,
4556                                         (unsigned long long)extent_item_refs);
4557                         }
4558                         rec->extent_item_refs = extent_item_refs;
4559                 }
4560                 if (is_root)
4561                         rec->is_root = 1;
4562                 if (set_checked) {
4563                         rec->content_checked = 1;
4564                         rec->owner_ref_checked = 1;
4565                 }
4566
4567                 if (parent_key)
4568                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4569                 if (parent_gen)
4570                         rec->parent_generation = parent_gen;
4571
4572                 if (rec->max_size < max_size)
4573                         rec->max_size = max_size;
4574
4575                 /*
4576                  * A metadata extent can't cross stripe_len boundary, otherwise
4577                  * kernel scrub won't be able to handle it.
4578                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4579                  * it.
4580                  */
4581                 if (metadata && check_crossing_stripes(rec->start,
4582                                                        rec->max_size))
4583                                 rec->crossing_stripes = 1;
4584                 check_extent_type(rec);
4585                 maybe_free_extent_rec(extent_cache, rec);
4586                 return ret;
4587         }
4588         rec = malloc(sizeof(*rec));
4589         if (!rec)
4590                 return -ENOMEM;
4591         rec->start = start;
4592         rec->max_size = max_size;
4593         rec->nr = max(nr, max_size);
4594         rec->found_rec = !!extent_rec;
4595         rec->content_checked = 0;
4596         rec->owner_ref_checked = 0;
4597         rec->num_duplicates = 0;
4598         rec->metadata = metadata;
4599         rec->flag_block_full_backref = -1;
4600         rec->bad_full_backref = 0;
4601         rec->crossing_stripes = 0;
4602         rec->wrong_chunk_type = 0;
4603         INIT_LIST_HEAD(&rec->backrefs);
4604         INIT_LIST_HEAD(&rec->dups);
4605         INIT_LIST_HEAD(&rec->list);
4606
4607         if (is_root)
4608                 rec->is_root = 1;
4609         else
4610                 rec->is_root = 0;
4611
4612         if (inc_ref)
4613                 rec->refs = 1;
4614         else
4615                 rec->refs = 0;
4616
4617         if (extent_item_refs)
4618                 rec->extent_item_refs = extent_item_refs;
4619         else
4620                 rec->extent_item_refs = 0;
4621
4622         if (parent_key)
4623                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4624         else
4625                 memset(&rec->parent_key, 0, sizeof(*parent_key));
4626
4627         if (parent_gen)
4628                 rec->parent_generation = parent_gen;
4629         else
4630                 rec->parent_generation = 0;
4631
4632         rec->cache.start = start;
4633         rec->cache.size = nr;
4634         ret = insert_cache_extent(extent_cache, &rec->cache);
4635         BUG_ON(ret);
4636         bytes_used += nr;
4637         if (set_checked) {
4638                 rec->content_checked = 1;
4639                 rec->owner_ref_checked = 1;
4640         }
4641
4642         if (metadata)
4643                 if (check_crossing_stripes(rec->start, rec->max_size))
4644                         rec->crossing_stripes = 1;
4645         check_extent_type(rec);
4646         return ret;
4647 }
4648
4649 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4650                             u64 parent, u64 root, int found_ref)
4651 {
4652         struct extent_record *rec;
4653         struct tree_backref *back;
4654         struct cache_extent *cache;
4655
4656         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4657         if (!cache) {
4658                 add_extent_rec(extent_cache, NULL, 0, bytenr,
4659                                1, 0, 0, 0, 0, 1, 0, 0);
4660                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4661                 if (!cache)
4662                         abort();
4663         }
4664
4665         rec = container_of(cache, struct extent_record, cache);
4666         if (rec->start != bytenr) {
4667                 abort();
4668         }
4669
4670         back = find_tree_backref(rec, parent, root);
4671         if (!back) {
4672                 back = alloc_tree_backref(rec, parent, root);
4673                 BUG_ON(!back);
4674         }
4675
4676         if (found_ref) {
4677                 if (back->node.found_ref) {
4678                         fprintf(stderr, "Extent back ref already exists "
4679                                 "for %llu parent %llu root %llu \n",
4680                                 (unsigned long long)bytenr,
4681                                 (unsigned long long)parent,
4682                                 (unsigned long long)root);
4683                 }
4684                 back->node.found_ref = 1;
4685         } else {
4686                 if (back->node.found_extent_tree) {
4687                         fprintf(stderr, "Extent back ref already exists "
4688                                 "for %llu parent %llu root %llu \n",
4689                                 (unsigned long long)bytenr,
4690                                 (unsigned long long)parent,
4691                                 (unsigned long long)root);
4692                 }
4693                 back->node.found_extent_tree = 1;
4694         }
4695         check_extent_type(rec);
4696         maybe_free_extent_rec(extent_cache, rec);
4697         return 0;
4698 }
4699
4700 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4701                             u64 parent, u64 root, u64 owner, u64 offset,
4702                             u32 num_refs, int found_ref, u64 max_size)
4703 {
4704         struct extent_record *rec;
4705         struct data_backref *back;
4706         struct cache_extent *cache;
4707
4708         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4709         if (!cache) {
4710                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4711                                0, 0, max_size);
4712                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4713                 if (!cache)
4714                         abort();
4715         }
4716
4717         rec = container_of(cache, struct extent_record, cache);
4718         if (rec->max_size < max_size)
4719                 rec->max_size = max_size;
4720
4721         /*
4722          * If found_ref is set then max_size is the real size and must match the
4723          * existing refs.  So if we have already found a ref then we need to
4724          * make sure that this ref matches the existing one, otherwise we need
4725          * to add a new backref so we can notice that the backrefs don't match
4726          * and we need to figure out who is telling the truth.  This is to
4727          * account for that awful fsync bug I introduced where we'd end up with
4728          * a btrfs_file_extent_item that would have its length include multiple
4729          * prealloc extents or point inside of a prealloc extent.
4730          */
4731         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4732                                  bytenr, max_size);
4733         if (!back) {
4734                 back = alloc_data_backref(rec, parent, root, owner, offset,
4735                                           max_size);
4736                 BUG_ON(!back);
4737         }
4738
4739         if (found_ref) {
4740                 BUG_ON(num_refs != 1);
4741                 if (back->node.found_ref)
4742                         BUG_ON(back->bytes != max_size);
4743                 back->node.found_ref = 1;
4744                 back->found_ref += 1;
4745                 back->bytes = max_size;
4746                 back->disk_bytenr = bytenr;
4747                 rec->refs += 1;
4748                 rec->content_checked = 1;
4749                 rec->owner_ref_checked = 1;
4750         } else {
4751                 if (back->node.found_extent_tree) {
4752                         fprintf(stderr, "Extent back ref already exists "
4753                                 "for %llu parent %llu root %llu "
4754                                 "owner %llu offset %llu num_refs %lu\n",
4755                                 (unsigned long long)bytenr,
4756                                 (unsigned long long)parent,
4757                                 (unsigned long long)root,
4758                                 (unsigned long long)owner,
4759                                 (unsigned long long)offset,
4760                                 (unsigned long)num_refs);
4761                 }
4762                 back->num_refs = num_refs;
4763                 back->node.found_extent_tree = 1;
4764         }
4765         maybe_free_extent_rec(extent_cache, rec);
4766         return 0;
4767 }
4768
4769 static int add_pending(struct cache_tree *pending,
4770                        struct cache_tree *seen, u64 bytenr, u32 size)
4771 {
4772         int ret;
4773         ret = add_cache_extent(seen, bytenr, size);
4774         if (ret)
4775                 return ret;
4776         add_cache_extent(pending, bytenr, size);
4777         return 0;
4778 }
4779
4780 static int pick_next_pending(struct cache_tree *pending,
4781                         struct cache_tree *reada,
4782                         struct cache_tree *nodes,
4783                         u64 last, struct block_info *bits, int bits_nr,
4784                         int *reada_bits)
4785 {
4786         unsigned long node_start = last;
4787         struct cache_extent *cache;
4788         int ret;
4789
4790         cache = search_cache_extent(reada, 0);
4791         if (cache) {
4792                 bits[0].start = cache->start;
4793                 bits[0].size = cache->size;
4794                 *reada_bits = 1;
4795                 return 1;
4796         }
4797         *reada_bits = 0;
4798         if (node_start > 32768)
4799                 node_start -= 32768;
4800
4801         cache = search_cache_extent(nodes, node_start);
4802         if (!cache)
4803                 cache = search_cache_extent(nodes, 0);
4804
4805         if (!cache) {
4806                  cache = search_cache_extent(pending, 0);
4807                  if (!cache)
4808                          return 0;
4809                  ret = 0;
4810                  do {
4811                          bits[ret].start = cache->start;
4812                          bits[ret].size = cache->size;
4813                          cache = next_cache_extent(cache);
4814                          ret++;
4815                  } while (cache && ret < bits_nr);
4816                  return ret;
4817         }
4818
4819         ret = 0;
4820         do {
4821                 bits[ret].start = cache->start;
4822                 bits[ret].size = cache->size;
4823                 cache = next_cache_extent(cache);
4824                 ret++;
4825         } while (cache && ret < bits_nr);
4826
4827         if (bits_nr - ret > 8) {
4828                 u64 lookup = bits[0].start + bits[0].size;
4829                 struct cache_extent *next;
4830                 next = search_cache_extent(pending, lookup);
4831                 while(next) {
4832                         if (next->start - lookup > 32768)
4833                                 break;
4834                         bits[ret].start = next->start;
4835                         bits[ret].size = next->size;
4836                         lookup = next->start + next->size;
4837                         ret++;
4838                         if (ret == bits_nr)
4839                                 break;
4840                         next = next_cache_extent(next);
4841                         if (!next)
4842                                 break;
4843                 }
4844         }
4845         return ret;
4846 }
4847
4848 static void free_chunk_record(struct cache_extent *cache)
4849 {
4850         struct chunk_record *rec;
4851
4852         rec = container_of(cache, struct chunk_record, cache);
4853         list_del_init(&rec->list);
4854         list_del_init(&rec->dextents);
4855         free(rec);
4856 }
4857
4858 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4859 {
4860         cache_tree_free_extents(chunk_cache, free_chunk_record);
4861 }
4862
4863 static void free_device_record(struct rb_node *node)
4864 {
4865         struct device_record *rec;
4866
4867         rec = container_of(node, struct device_record, node);
4868         free(rec);
4869 }
4870
4871 FREE_RB_BASED_TREE(device_cache, free_device_record);
4872
4873 int insert_block_group_record(struct block_group_tree *tree,
4874                               struct block_group_record *bg_rec)
4875 {
4876         int ret;
4877
4878         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4879         if (ret)
4880                 return ret;
4881
4882         list_add_tail(&bg_rec->list, &tree->block_groups);
4883         return 0;
4884 }
4885
4886 static void free_block_group_record(struct cache_extent *cache)
4887 {
4888         struct block_group_record *rec;
4889
4890         rec = container_of(cache, struct block_group_record, cache);
4891         list_del_init(&rec->list);
4892         free(rec);
4893 }
4894
4895 void free_block_group_tree(struct block_group_tree *tree)
4896 {
4897         cache_tree_free_extents(&tree->tree, free_block_group_record);
4898 }
4899
4900 int insert_device_extent_record(struct device_extent_tree *tree,
4901                                 struct device_extent_record *de_rec)
4902 {
4903         int ret;
4904
4905         /*
4906          * Device extent is a bit different from the other extents, because
4907          * the extents which belong to the different devices may have the
4908          * same start and size, so we need use the special extent cache
4909          * search/insert functions.
4910          */
4911         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4912         if (ret)
4913                 return ret;
4914
4915         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4916         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4917         return 0;
4918 }
4919
4920 static void free_device_extent_record(struct cache_extent *cache)
4921 {
4922         struct device_extent_record *rec;
4923
4924         rec = container_of(cache, struct device_extent_record, cache);
4925         if (!list_empty(&rec->chunk_list))
4926                 list_del_init(&rec->chunk_list);
4927         if (!list_empty(&rec->device_list))
4928                 list_del_init(&rec->device_list);
4929         free(rec);
4930 }
4931
4932 void free_device_extent_tree(struct device_extent_tree *tree)
4933 {
4934         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4935 }
4936
4937 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4938 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4939                                  struct extent_buffer *leaf, int slot)
4940 {
4941         struct btrfs_extent_ref_v0 *ref0;
4942         struct btrfs_key key;
4943
4944         btrfs_item_key_to_cpu(leaf, &key, slot);
4945         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4946         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4947                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4948         } else {
4949                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4950                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4951         }
4952         return 0;
4953 }
4954 #endif
4955
4956 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4957                                             struct btrfs_key *key,
4958                                             int slot)
4959 {
4960         struct btrfs_chunk *ptr;
4961         struct chunk_record *rec;
4962         int num_stripes, i;
4963
4964         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4965         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4966
4967         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4968         if (!rec) {
4969                 fprintf(stderr, "memory allocation failed\n");
4970                 exit(-1);
4971         }
4972
4973         INIT_LIST_HEAD(&rec->list);
4974         INIT_LIST_HEAD(&rec->dextents);
4975         rec->bg_rec = NULL;
4976
4977         rec->cache.start = key->offset;
4978         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4979
4980         rec->generation = btrfs_header_generation(leaf);
4981
4982         rec->objectid = key->objectid;
4983         rec->type = key->type;
4984         rec->offset = key->offset;
4985
4986         rec->length = rec->cache.size;
4987         rec->owner = btrfs_chunk_owner(leaf, ptr);
4988         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4989         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4990         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4991         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4992         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4993         rec->num_stripes = num_stripes;
4994         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4995
4996         for (i = 0; i < rec->num_stripes; ++i) {
4997                 rec->stripes[i].devid =
4998                         btrfs_stripe_devid_nr(leaf, ptr, i);
4999                 rec->stripes[i].offset =
5000                         btrfs_stripe_offset_nr(leaf, ptr, i);
5001                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5002                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5003                                 BTRFS_UUID_SIZE);
5004         }
5005
5006         return rec;
5007 }
5008
5009 static int process_chunk_item(struct cache_tree *chunk_cache,
5010                               struct btrfs_key *key, struct extent_buffer *eb,
5011                               int slot)
5012 {
5013         struct chunk_record *rec;
5014         int ret = 0;
5015
5016         rec = btrfs_new_chunk_record(eb, key, slot);
5017         ret = insert_cache_extent(chunk_cache, &rec->cache);
5018         if (ret) {
5019                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5020                         rec->offset, rec->length);
5021                 free(rec);
5022         }
5023
5024         return ret;
5025 }
5026
5027 static int process_device_item(struct rb_root *dev_cache,
5028                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5029 {
5030         struct btrfs_dev_item *ptr;
5031         struct device_record *rec;
5032         int ret = 0;
5033
5034         ptr = btrfs_item_ptr(eb,
5035                 slot, struct btrfs_dev_item);
5036
5037         rec = malloc(sizeof(*rec));
5038         if (!rec) {
5039                 fprintf(stderr, "memory allocation failed\n");
5040                 return -ENOMEM;
5041         }
5042
5043         rec->devid = key->offset;
5044         rec->generation = btrfs_header_generation(eb);
5045
5046         rec->objectid = key->objectid;
5047         rec->type = key->type;
5048         rec->offset = key->offset;
5049
5050         rec->devid = btrfs_device_id(eb, ptr);
5051         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5052         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5053
5054         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5055         if (ret) {
5056                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5057                 free(rec);
5058         }
5059
5060         return ret;
5061 }
5062
5063 struct block_group_record *
5064 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5065                              int slot)
5066 {
5067         struct btrfs_block_group_item *ptr;
5068         struct block_group_record *rec;
5069
5070         rec = calloc(1, sizeof(*rec));
5071         if (!rec) {
5072                 fprintf(stderr, "memory allocation failed\n");
5073                 exit(-1);
5074         }
5075
5076         rec->cache.start = key->objectid;
5077         rec->cache.size = key->offset;
5078
5079         rec->generation = btrfs_header_generation(leaf);
5080
5081         rec->objectid = key->objectid;
5082         rec->type = key->type;
5083         rec->offset = key->offset;
5084
5085         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5086         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5087
5088         INIT_LIST_HEAD(&rec->list);
5089
5090         return rec;
5091 }
5092
5093 static int process_block_group_item(struct block_group_tree *block_group_cache,
5094                                     struct btrfs_key *key,
5095                                     struct extent_buffer *eb, int slot)
5096 {
5097         struct block_group_record *rec;
5098         int ret = 0;
5099
5100         rec = btrfs_new_block_group_record(eb, key, slot);
5101         ret = insert_block_group_record(block_group_cache, rec);
5102         if (ret) {
5103                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5104                         rec->objectid, rec->offset);
5105                 free(rec);
5106         }
5107
5108         return ret;
5109 }
5110
5111 struct device_extent_record *
5112 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5113                                struct btrfs_key *key, int slot)
5114 {
5115         struct device_extent_record *rec;
5116         struct btrfs_dev_extent *ptr;
5117
5118         rec = calloc(1, sizeof(*rec));
5119         if (!rec) {
5120                 fprintf(stderr, "memory allocation failed\n");
5121                 exit(-1);
5122         }
5123
5124         rec->cache.objectid = key->objectid;
5125         rec->cache.start = key->offset;
5126
5127         rec->generation = btrfs_header_generation(leaf);
5128
5129         rec->objectid = key->objectid;
5130         rec->type = key->type;
5131         rec->offset = key->offset;
5132
5133         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5134         rec->chunk_objecteid =
5135                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5136         rec->chunk_offset =
5137                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5138         rec->length = btrfs_dev_extent_length(leaf, ptr);
5139         rec->cache.size = rec->length;
5140
5141         INIT_LIST_HEAD(&rec->chunk_list);
5142         INIT_LIST_HEAD(&rec->device_list);
5143
5144         return rec;
5145 }
5146
5147 static int
5148 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5149                            struct btrfs_key *key, struct extent_buffer *eb,
5150                            int slot)
5151 {
5152         struct device_extent_record *rec;
5153         int ret;
5154
5155         rec = btrfs_new_device_extent_record(eb, key, slot);
5156         ret = insert_device_extent_record(dev_extent_cache, rec);
5157         if (ret) {
5158                 fprintf(stderr,
5159                         "Device extent[%llu, %llu, %llu] existed.\n",
5160                         rec->objectid, rec->offset, rec->length);
5161                 free(rec);
5162         }
5163
5164         return ret;
5165 }
5166
5167 static int process_extent_item(struct btrfs_root *root,
5168                                struct cache_tree *extent_cache,
5169                                struct extent_buffer *eb, int slot)
5170 {
5171         struct btrfs_extent_item *ei;
5172         struct btrfs_extent_inline_ref *iref;
5173         struct btrfs_extent_data_ref *dref;
5174         struct btrfs_shared_data_ref *sref;
5175         struct btrfs_key key;
5176         unsigned long end;
5177         unsigned long ptr;
5178         int type;
5179         u32 item_size = btrfs_item_size_nr(eb, slot);
5180         u64 refs = 0;
5181         u64 offset;
5182         u64 num_bytes;
5183         int metadata = 0;
5184
5185         btrfs_item_key_to_cpu(eb, &key, slot);
5186
5187         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5188                 metadata = 1;
5189                 num_bytes = root->leafsize;
5190         } else {
5191                 num_bytes = key.offset;
5192         }
5193
5194         if (item_size < sizeof(*ei)) {
5195 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5196                 struct btrfs_extent_item_v0 *ei0;
5197                 BUG_ON(item_size != sizeof(*ei0));
5198                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5199                 refs = btrfs_extent_refs_v0(eb, ei0);
5200 #else
5201                 BUG();
5202 #endif
5203                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5204                                       num_bytes, refs, 0, 0, 0, metadata, 1,
5205                                       num_bytes);
5206         }
5207
5208         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5209         refs = btrfs_extent_refs(eb, ei);
5210         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5211                 metadata = 1;
5212         else
5213                 metadata = 0;
5214
5215         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5216                        refs, 0, 0, 0, metadata, 1, num_bytes);
5217
5218         ptr = (unsigned long)(ei + 1);
5219         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5220             key.type == BTRFS_EXTENT_ITEM_KEY)
5221                 ptr += sizeof(struct btrfs_tree_block_info);
5222
5223         end = (unsigned long)ei + item_size;
5224         while (ptr < end) {
5225                 iref = (struct btrfs_extent_inline_ref *)ptr;
5226                 type = btrfs_extent_inline_ref_type(eb, iref);
5227                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5228                 switch (type) {
5229                 case BTRFS_TREE_BLOCK_REF_KEY:
5230                         add_tree_backref(extent_cache, key.objectid,
5231                                          0, offset, 0);
5232                         break;
5233                 case BTRFS_SHARED_BLOCK_REF_KEY:
5234                         add_tree_backref(extent_cache, key.objectid,
5235                                          offset, 0, 0);
5236                         break;
5237                 case BTRFS_EXTENT_DATA_REF_KEY:
5238                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5239                         add_data_backref(extent_cache, key.objectid, 0,
5240                                         btrfs_extent_data_ref_root(eb, dref),
5241                                         btrfs_extent_data_ref_objectid(eb,
5242                                                                        dref),
5243                                         btrfs_extent_data_ref_offset(eb, dref),
5244                                         btrfs_extent_data_ref_count(eb, dref),
5245                                         0, num_bytes);
5246                         break;
5247                 case BTRFS_SHARED_DATA_REF_KEY:
5248                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5249                         add_data_backref(extent_cache, key.objectid, offset,
5250                                         0, 0, 0,
5251                                         btrfs_shared_data_ref_count(eb, sref),
5252                                         0, num_bytes);
5253                         break;
5254                 default:
5255                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5256                                 key.objectid, key.type, num_bytes);
5257                         goto out;
5258                 }
5259                 ptr += btrfs_extent_inline_ref_size(type);
5260         }
5261         WARN_ON(ptr > end);
5262 out:
5263         return 0;
5264 }
5265
5266 static int check_cache_range(struct btrfs_root *root,
5267                              struct btrfs_block_group_cache *cache,
5268                              u64 offset, u64 bytes)
5269 {
5270         struct btrfs_free_space *entry;
5271         u64 *logical;
5272         u64 bytenr;
5273         int stripe_len;
5274         int i, nr, ret;
5275
5276         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5277                 bytenr = btrfs_sb_offset(i);
5278                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5279                                        cache->key.objectid, bytenr, 0,
5280                                        &logical, &nr, &stripe_len);
5281                 if (ret)
5282                         return ret;
5283
5284                 while (nr--) {
5285                         if (logical[nr] + stripe_len <= offset)
5286                                 continue;
5287                         if (offset + bytes <= logical[nr])
5288                                 continue;
5289                         if (logical[nr] == offset) {
5290                                 if (stripe_len >= bytes) {
5291                                         kfree(logical);
5292                                         return 0;
5293                                 }
5294                                 bytes -= stripe_len;
5295                                 offset += stripe_len;
5296                         } else if (logical[nr] < offset) {
5297                                 if (logical[nr] + stripe_len >=
5298                                     offset + bytes) {
5299                                         kfree(logical);
5300                                         return 0;
5301                                 }
5302                                 bytes = (offset + bytes) -
5303                                         (logical[nr] + stripe_len);
5304                                 offset = logical[nr] + stripe_len;
5305                         } else {
5306                                 /*
5307                                  * Could be tricky, the super may land in the
5308                                  * middle of the area we're checking.  First
5309                                  * check the easiest case, it's at the end.
5310                                  */
5311                                 if (logical[nr] + stripe_len >=
5312                                     bytes + offset) {
5313                                         bytes = logical[nr] - offset;
5314                                         continue;
5315                                 }
5316
5317                                 /* Check the left side */
5318                                 ret = check_cache_range(root, cache,
5319                                                         offset,
5320                                                         logical[nr] - offset);
5321                                 if (ret) {
5322                                         kfree(logical);
5323                                         return ret;
5324                                 }
5325
5326                                 /* Now we continue with the right side */
5327                                 bytes = (offset + bytes) -
5328                                         (logical[nr] + stripe_len);
5329                                 offset = logical[nr] + stripe_len;
5330                         }
5331                 }
5332
5333                 kfree(logical);
5334         }
5335
5336         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5337         if (!entry) {
5338                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5339                         offset, offset+bytes);
5340                 return -EINVAL;
5341         }
5342
5343         if (entry->offset != offset) {
5344                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5345                         entry->offset);
5346                 return -EINVAL;
5347         }
5348
5349         if (entry->bytes != bytes) {
5350                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5351                         bytes, entry->bytes, offset);
5352                 return -EINVAL;
5353         }
5354
5355         unlink_free_space(cache->free_space_ctl, entry);
5356         free(entry);
5357         return 0;
5358 }
5359
5360 static int verify_space_cache(struct btrfs_root *root,
5361                               struct btrfs_block_group_cache *cache)
5362 {
5363         struct btrfs_path *path;
5364         struct extent_buffer *leaf;
5365         struct btrfs_key key;
5366         u64 last;
5367         int ret = 0;
5368
5369         path = btrfs_alloc_path();
5370         if (!path)
5371                 return -ENOMEM;
5372
5373         root = root->fs_info->extent_root;
5374
5375         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5376
5377         key.objectid = last;
5378         key.offset = 0;
5379         key.type = BTRFS_EXTENT_ITEM_KEY;
5380
5381         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5382         if (ret < 0)
5383                 goto out;
5384         ret = 0;
5385         while (1) {
5386                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5387                         ret = btrfs_next_leaf(root, path);
5388                         if (ret < 0)
5389                                 goto out;
5390                         if (ret > 0) {
5391                                 ret = 0;
5392                                 break;
5393                         }
5394                 }
5395                 leaf = path->nodes[0];
5396                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5397                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5398                         break;
5399                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5400                     key.type != BTRFS_METADATA_ITEM_KEY) {
5401                         path->slots[0]++;
5402                         continue;
5403                 }
5404
5405                 if (last == key.objectid) {
5406                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5407                                 last = key.objectid + key.offset;
5408                         else
5409                                 last = key.objectid + root->leafsize;
5410                         path->slots[0]++;
5411                         continue;
5412                 }
5413
5414                 ret = check_cache_range(root, cache, last,
5415                                         key.objectid - last);
5416                 if (ret)
5417                         break;
5418                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5419                         last = key.objectid + key.offset;
5420                 else
5421                         last = key.objectid + root->leafsize;
5422                 path->slots[0]++;
5423         }
5424
5425         if (last < cache->key.objectid + cache->key.offset)
5426                 ret = check_cache_range(root, cache, last,
5427                                         cache->key.objectid +
5428                                         cache->key.offset - last);
5429
5430 out:
5431         btrfs_free_path(path);
5432
5433         if (!ret &&
5434             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5435                 fprintf(stderr, "There are still entries left in the space "
5436                         "cache\n");
5437                 ret = -EINVAL;
5438         }
5439
5440         return ret;
5441 }
5442
5443 static int check_space_cache(struct btrfs_root *root)
5444 {
5445         struct btrfs_block_group_cache *cache;
5446         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5447         int ret;
5448         int error = 0;
5449
5450         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5451             btrfs_super_generation(root->fs_info->super_copy) !=
5452             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5453                 printf("cache and super generation don't match, space cache "
5454                        "will be invalidated\n");
5455                 return 0;
5456         }
5457
5458         if (ctx.progress_enabled) {
5459                 ctx.tp = TASK_FREE_SPACE;
5460                 task_start(ctx.info);
5461         }
5462
5463         while (1) {
5464                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5465                 if (!cache)
5466                         break;
5467
5468                 start = cache->key.objectid + cache->key.offset;
5469                 if (!cache->free_space_ctl) {
5470                         if (btrfs_init_free_space_ctl(cache,
5471                                                       root->sectorsize)) {
5472                                 ret = -ENOMEM;
5473                                 break;
5474                         }
5475                 } else {
5476                         btrfs_remove_free_space_cache(cache);
5477                 }
5478
5479                 ret = load_free_space_cache(root->fs_info, cache);
5480                 if (!ret)
5481                         continue;
5482
5483                 ret = verify_space_cache(root, cache);
5484                 if (ret) {
5485                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
5486                                 cache->key.objectid);
5487                         error++;
5488                 }
5489         }
5490
5491         task_stop(ctx.info);
5492
5493         return error ? -EINVAL : 0;
5494 }
5495
5496 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5497                         u64 num_bytes, unsigned long leaf_offset,
5498                         struct extent_buffer *eb) {
5499
5500         u64 offset = 0;
5501         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5502         char *data;
5503         unsigned long csum_offset;
5504         u32 csum;
5505         u32 csum_expected;
5506         u64 read_len;
5507         u64 data_checked = 0;
5508         u64 tmp;
5509         int ret = 0;
5510         int mirror;
5511         int num_copies;
5512
5513         if (num_bytes % root->sectorsize)
5514                 return -EINVAL;
5515
5516         data = malloc(num_bytes);
5517         if (!data)
5518                 return -ENOMEM;
5519
5520         while (offset < num_bytes) {
5521                 mirror = 0;
5522 again:
5523                 read_len = num_bytes - offset;
5524                 /* read as much space once a time */
5525                 ret = read_extent_data(root, data + offset,
5526                                 bytenr + offset, &read_len, mirror);
5527                 if (ret)
5528                         goto out;
5529                 data_checked = 0;
5530                 /* verify every 4k data's checksum */
5531                 while (data_checked < read_len) {
5532                         csum = ~(u32)0;
5533                         tmp = offset + data_checked;
5534
5535                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5536                                                csum, root->sectorsize);
5537                         btrfs_csum_final(csum, (char *)&csum);
5538
5539                         csum_offset = leaf_offset +
5540                                  tmp / root->sectorsize * csum_size;
5541                         read_extent_buffer(eb, (char *)&csum_expected,
5542                                            csum_offset, csum_size);
5543                         /* try another mirror */
5544                         if (csum != csum_expected) {
5545                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5546                                                 mirror, bytenr + tmp,
5547                                                 csum, csum_expected);
5548                                 num_copies = btrfs_num_copies(
5549                                                 &root->fs_info->mapping_tree,
5550                                                 bytenr, num_bytes);
5551                                 if (mirror < num_copies - 1) {
5552                                         mirror += 1;
5553                                         goto again;
5554                                 }
5555                         }
5556                         data_checked += root->sectorsize;
5557                 }
5558                 offset += read_len;
5559         }
5560 out:
5561         free(data);
5562         return ret;
5563 }
5564
5565 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5566                                u64 num_bytes)
5567 {
5568         struct btrfs_path *path;
5569         struct extent_buffer *leaf;
5570         struct btrfs_key key;
5571         int ret;
5572
5573         path = btrfs_alloc_path();
5574         if (!path) {
5575                 fprintf(stderr, "Error allocing path\n");
5576                 return -ENOMEM;
5577         }
5578
5579         key.objectid = bytenr;
5580         key.type = BTRFS_EXTENT_ITEM_KEY;
5581         key.offset = (u64)-1;
5582
5583 again:
5584         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5585                                 0, 0);
5586         if (ret < 0) {
5587                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5588                 btrfs_free_path(path);
5589                 return ret;
5590         } else if (ret) {
5591                 if (path->slots[0] > 0) {
5592                         path->slots[0]--;
5593                 } else {
5594                         ret = btrfs_prev_leaf(root, path);
5595                         if (ret < 0) {
5596                                 goto out;
5597                         } else if (ret > 0) {
5598                                 ret = 0;
5599                                 goto out;
5600                         }
5601                 }
5602         }
5603
5604         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5605
5606         /*
5607          * Block group items come before extent items if they have the same
5608          * bytenr, so walk back one more just in case.  Dear future traveler,
5609          * first congrats on mastering time travel.  Now if it's not too much
5610          * trouble could you go back to 2006 and tell Chris to make the
5611          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5612          * EXTENT_ITEM_KEY please?
5613          */
5614         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5615                 if (path->slots[0] > 0) {
5616                         path->slots[0]--;
5617                 } else {
5618                         ret = btrfs_prev_leaf(root, path);
5619                         if (ret < 0) {
5620                                 goto out;
5621                         } else if (ret > 0) {
5622                                 ret = 0;
5623                                 goto out;
5624                         }
5625                 }
5626                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5627         }
5628
5629         while (num_bytes) {
5630                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5631                         ret = btrfs_next_leaf(root, path);
5632                         if (ret < 0) {
5633                                 fprintf(stderr, "Error going to next leaf "
5634                                         "%d\n", ret);
5635                                 btrfs_free_path(path);
5636                                 return ret;
5637                         } else if (ret) {
5638                                 break;
5639                         }
5640                 }
5641                 leaf = path->nodes[0];
5642                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5643                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5644                         path->slots[0]++;
5645                         continue;
5646                 }
5647                 if (key.objectid + key.offset < bytenr) {
5648                         path->slots[0]++;
5649                         continue;
5650                 }
5651                 if (key.objectid > bytenr + num_bytes)
5652                         break;
5653
5654                 if (key.objectid == bytenr) {
5655                         if (key.offset >= num_bytes) {
5656                                 num_bytes = 0;
5657                                 break;
5658                         }
5659                         num_bytes -= key.offset;
5660                         bytenr += key.offset;
5661                 } else if (key.objectid < bytenr) {
5662                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5663                                 num_bytes = 0;
5664                                 break;
5665                         }
5666                         num_bytes = (bytenr + num_bytes) -
5667                                 (key.objectid + key.offset);
5668                         bytenr = key.objectid + key.offset;
5669                 } else {
5670                         if (key.objectid + key.offset < bytenr + num_bytes) {
5671                                 u64 new_start = key.objectid + key.offset;
5672                                 u64 new_bytes = bytenr + num_bytes - new_start;
5673
5674                                 /*
5675                                  * Weird case, the extent is in the middle of
5676                                  * our range, we'll have to search one side
5677                                  * and then the other.  Not sure if this happens
5678                                  * in real life, but no harm in coding it up
5679                                  * anyway just in case.
5680                                  */
5681                                 btrfs_release_path(path);
5682                                 ret = check_extent_exists(root, new_start,
5683                                                           new_bytes);
5684                                 if (ret) {
5685                                         fprintf(stderr, "Right section didn't "
5686                                                 "have a record\n");
5687                                         break;
5688                                 }
5689                                 num_bytes = key.objectid - bytenr;
5690                                 goto again;
5691                         }
5692                         num_bytes = key.objectid - bytenr;
5693                 }
5694                 path->slots[0]++;
5695         }
5696         ret = 0;
5697
5698 out:
5699         if (num_bytes && !ret) {
5700                 fprintf(stderr, "There are no extents for csum range "
5701                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5702                 ret = 1;
5703         }
5704
5705         btrfs_free_path(path);
5706         return ret;
5707 }
5708
5709 static int check_csums(struct btrfs_root *root)
5710 {
5711         struct btrfs_path *path;
5712         struct extent_buffer *leaf;
5713         struct btrfs_key key;
5714         u64 offset = 0, num_bytes = 0;
5715         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5716         int errors = 0;
5717         int ret;
5718         u64 data_len;
5719         unsigned long leaf_offset;
5720
5721         root = root->fs_info->csum_root;
5722         if (!extent_buffer_uptodate(root->node)) {
5723                 fprintf(stderr, "No valid csum tree found\n");
5724                 return -ENOENT;
5725         }
5726
5727         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5728         key.type = BTRFS_EXTENT_CSUM_KEY;
5729         key.offset = 0;
5730
5731         path = btrfs_alloc_path();
5732         if (!path)
5733                 return -ENOMEM;
5734
5735         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5736         if (ret < 0) {
5737                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5738                 btrfs_free_path(path);
5739                 return ret;
5740         }
5741
5742         if (ret > 0 && path->slots[0])
5743                 path->slots[0]--;
5744         ret = 0;
5745
5746         while (1) {
5747                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5748                         ret = btrfs_next_leaf(root, path);
5749                         if (ret < 0) {
5750                                 fprintf(stderr, "Error going to next leaf "
5751                                         "%d\n", ret);
5752                                 break;
5753                         }
5754                         if (ret)
5755                                 break;
5756                 }
5757                 leaf = path->nodes[0];
5758
5759                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5760                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5761                         path->slots[0]++;
5762                         continue;
5763                 }
5764
5765                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5766                               csum_size) * root->sectorsize;
5767                 if (!check_data_csum)
5768                         goto skip_csum_check;
5769                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5770                 ret = check_extent_csums(root, key.offset, data_len,
5771                                          leaf_offset, leaf);
5772                 if (ret)
5773                         break;
5774 skip_csum_check:
5775                 if (!num_bytes) {
5776                         offset = key.offset;
5777                 } else if (key.offset != offset + num_bytes) {
5778                         ret = check_extent_exists(root, offset, num_bytes);
5779                         if (ret) {
5780                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5781                                         "there is no extent record\n",
5782                                         offset, offset+num_bytes);
5783                                 errors++;
5784                         }
5785                         offset = key.offset;
5786                         num_bytes = 0;
5787                 }
5788                 num_bytes += data_len;
5789                 path->slots[0]++;
5790         }
5791
5792         btrfs_free_path(path);
5793         return errors;
5794 }
5795
5796 static int is_dropped_key(struct btrfs_key *key,
5797                           struct btrfs_key *drop_key) {
5798         if (key->objectid < drop_key->objectid)
5799                 return 1;
5800         else if (key->objectid == drop_key->objectid) {
5801                 if (key->type < drop_key->type)
5802                         return 1;
5803                 else if (key->type == drop_key->type) {
5804                         if (key->offset < drop_key->offset)
5805                                 return 1;
5806                 }
5807         }
5808         return 0;
5809 }
5810
5811 /*
5812  * Here are the rules for FULL_BACKREF.
5813  *
5814  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5815  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5816  *      FULL_BACKREF set.
5817  * 3) We cow'ed the block walking down a reloc tree.  This is impossible to tell
5818  *    if it happened after the relocation occurred since we'll have dropped the
5819  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5820  *    have no real way to know for sure.
5821  *
5822  * We process the blocks one root at a time, and we start from the lowest root
5823  * objectid and go to the highest.  So we can just lookup the owner backref for
5824  * the record and if we don't find it then we know it doesn't exist and we have
5825  * a FULL BACKREF.
5826  *
5827  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5828  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5829  * be set or not and then we can check later once we've gathered all the refs.
5830  */
5831 static int calc_extent_flag(struct btrfs_root *root,
5832                            struct cache_tree *extent_cache,
5833                            struct extent_buffer *buf,
5834                            struct root_item_record *ri,
5835                            u64 *flags)
5836 {
5837         struct extent_record *rec;
5838         struct cache_extent *cache;
5839         struct tree_backref *tback;
5840         u64 owner = 0;
5841
5842         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5843         /* we have added this extent before */
5844         BUG_ON(!cache);
5845         rec = container_of(cache, struct extent_record, cache);
5846
5847         /*
5848          * Except file/reloc tree, we can not have
5849          * FULL BACKREF MODE
5850          */
5851         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5852                 goto normal;
5853         /*
5854          * root node
5855          */
5856         if (buf->start == ri->bytenr)
5857                 goto normal;
5858
5859         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5860                 goto full_backref;
5861
5862         owner = btrfs_header_owner(buf);
5863         if (owner == ri->objectid)
5864                 goto normal;
5865
5866         tback = find_tree_backref(rec, 0, owner);
5867         if (!tback)
5868                 goto full_backref;
5869 normal:
5870         *flags = 0;
5871         if (rec->flag_block_full_backref != -1 &&
5872             rec->flag_block_full_backref != 0)
5873                 rec->bad_full_backref = 1;
5874         return 0;
5875 full_backref:
5876         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5877         if (rec->flag_block_full_backref != -1 &&
5878             rec->flag_block_full_backref != 1)
5879                 rec->bad_full_backref = 1;
5880         return 0;
5881 }
5882
5883 static int run_next_block(struct btrfs_root *root,
5884                           struct block_info *bits,
5885                           int bits_nr,
5886                           u64 *last,
5887                           struct cache_tree *pending,
5888                           struct cache_tree *seen,
5889                           struct cache_tree *reada,
5890                           struct cache_tree *nodes,
5891                           struct cache_tree *extent_cache,
5892                           struct cache_tree *chunk_cache,
5893                           struct rb_root *dev_cache,
5894                           struct block_group_tree *block_group_cache,
5895                           struct device_extent_tree *dev_extent_cache,
5896                           struct root_item_record *ri)
5897 {
5898         struct extent_buffer *buf;
5899         struct extent_record *rec = NULL;
5900         u64 bytenr;
5901         u32 size;
5902         u64 parent;
5903         u64 owner;
5904         u64 flags;
5905         u64 ptr;
5906         u64 gen = 0;
5907         int ret = 0;
5908         int i;
5909         int nritems;
5910         struct btrfs_key key;
5911         struct cache_extent *cache;
5912         int reada_bits;
5913
5914         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5915                                     bits_nr, &reada_bits);
5916         if (nritems == 0)
5917                 return 1;
5918
5919         if (!reada_bits) {
5920                 for(i = 0; i < nritems; i++) {
5921                         ret = add_cache_extent(reada, bits[i].start,
5922                                                bits[i].size);
5923                         if (ret == -EEXIST)
5924                                 continue;
5925
5926                         /* fixme, get the parent transid */
5927                         readahead_tree_block(root, bits[i].start,
5928                                              bits[i].size, 0);
5929                 }
5930         }
5931         *last = bits[0].start;
5932         bytenr = bits[0].start;
5933         size = bits[0].size;
5934
5935         cache = lookup_cache_extent(pending, bytenr, size);
5936         if (cache) {
5937                 remove_cache_extent(pending, cache);
5938                 free(cache);
5939         }
5940         cache = lookup_cache_extent(reada, bytenr, size);
5941         if (cache) {
5942                 remove_cache_extent(reada, cache);
5943                 free(cache);
5944         }
5945         cache = lookup_cache_extent(nodes, bytenr, size);
5946         if (cache) {
5947                 remove_cache_extent(nodes, cache);
5948                 free(cache);
5949         }
5950         cache = lookup_cache_extent(extent_cache, bytenr, size);
5951         if (cache) {
5952                 rec = container_of(cache, struct extent_record, cache);
5953                 gen = rec->parent_generation;
5954         }
5955
5956         /* fixme, get the real parent transid */
5957         buf = read_tree_block(root, bytenr, size, gen);
5958         if (!extent_buffer_uptodate(buf)) {
5959                 record_bad_block_io(root->fs_info,
5960                                     extent_cache, bytenr, size);
5961                 goto out;
5962         }
5963
5964         nritems = btrfs_header_nritems(buf);
5965
5966         flags = 0;
5967         if (!init_extent_tree) {
5968                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5969                                        btrfs_header_level(buf), 1, NULL,
5970                                        &flags);
5971                 if (ret < 0) {
5972                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5973                         if (ret < 0) {
5974                                 fprintf(stderr, "Couldn't calc extent flags\n");
5975                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5976                         }
5977                 }
5978         } else {
5979                 flags = 0;
5980                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5981                 if (ret < 0) {
5982                         fprintf(stderr, "Couldn't calc extent flags\n");
5983                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5984                 }
5985         }
5986
5987         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5988                 if (ri != NULL &&
5989                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5990                     ri->objectid == btrfs_header_owner(buf)) {
5991                         /*
5992                          * Ok we got to this block from it's original owner and
5993                          * we have FULL_BACKREF set.  Relocation can leave
5994                          * converted blocks over so this is altogether possible,
5995                          * however it's not possible if the generation > the
5996                          * last snapshot, so check for this case.
5997                          */
5998                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5999                             btrfs_header_generation(buf) > ri->last_snapshot) {
6000                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6001                                 rec->bad_full_backref = 1;
6002                         }
6003                 }
6004         } else {
6005                 if (ri != NULL &&
6006                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6007                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6008                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6009                         rec->bad_full_backref = 1;
6010                 }
6011         }
6012
6013         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6014                 rec->flag_block_full_backref = 1;
6015                 parent = bytenr;
6016                 owner = 0;
6017         } else {
6018                 rec->flag_block_full_backref = 0;
6019                 parent = 0;
6020                 owner = btrfs_header_owner(buf);
6021         }
6022
6023         ret = check_block(root, extent_cache, buf, flags);
6024         if (ret)
6025                 goto out;
6026
6027         if (btrfs_is_leaf(buf)) {
6028                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6029                 for (i = 0; i < nritems; i++) {
6030                         struct btrfs_file_extent_item *fi;
6031                         btrfs_item_key_to_cpu(buf, &key, i);
6032                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6033                                 process_extent_item(root, extent_cache, buf,
6034                                                     i);
6035                                 continue;
6036                         }
6037                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6038                                 process_extent_item(root, extent_cache, buf,
6039                                                     i);
6040                                 continue;
6041                         }
6042                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6043                                 total_csum_bytes +=
6044                                         btrfs_item_size_nr(buf, i);
6045                                 continue;
6046                         }
6047                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6048                                 process_chunk_item(chunk_cache, &key, buf, i);
6049                                 continue;
6050                         }
6051                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6052                                 process_device_item(dev_cache, &key, buf, i);
6053                                 continue;
6054                         }
6055                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6056                                 process_block_group_item(block_group_cache,
6057                                         &key, buf, i);
6058                                 continue;
6059                         }
6060                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6061                                 process_device_extent_item(dev_extent_cache,
6062                                         &key, buf, i);
6063                                 continue;
6064
6065                         }
6066                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6067 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6068                                 process_extent_ref_v0(extent_cache, buf, i);
6069 #else
6070                                 BUG();
6071 #endif
6072                                 continue;
6073                         }
6074
6075                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6076                                 add_tree_backref(extent_cache, key.objectid, 0,
6077                                                  key.offset, 0);
6078                                 continue;
6079                         }
6080                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6081                                 add_tree_backref(extent_cache, key.objectid,
6082                                                  key.offset, 0, 0);
6083                                 continue;
6084                         }
6085                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6086                                 struct btrfs_extent_data_ref *ref;
6087                                 ref = btrfs_item_ptr(buf, i,
6088                                                 struct btrfs_extent_data_ref);
6089                                 add_data_backref(extent_cache,
6090                                         key.objectid, 0,
6091                                         btrfs_extent_data_ref_root(buf, ref),
6092                                         btrfs_extent_data_ref_objectid(buf,
6093                                                                        ref),
6094                                         btrfs_extent_data_ref_offset(buf, ref),
6095                                         btrfs_extent_data_ref_count(buf, ref),
6096                                         0, root->sectorsize);
6097                                 continue;
6098                         }
6099                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6100                                 struct btrfs_shared_data_ref *ref;
6101                                 ref = btrfs_item_ptr(buf, i,
6102                                                 struct btrfs_shared_data_ref);
6103                                 add_data_backref(extent_cache,
6104                                         key.objectid, key.offset, 0, 0, 0,
6105                                         btrfs_shared_data_ref_count(buf, ref),
6106                                         0, root->sectorsize);
6107                                 continue;
6108                         }
6109                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6110                                 struct bad_item *bad;
6111
6112                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6113                                         continue;
6114                                 if (!owner)
6115                                         continue;
6116                                 bad = malloc(sizeof(struct bad_item));
6117                                 if (!bad)
6118                                         continue;
6119                                 INIT_LIST_HEAD(&bad->list);
6120                                 memcpy(&bad->key, &key,
6121                                        sizeof(struct btrfs_key));
6122                                 bad->root_id = owner;
6123                                 list_add_tail(&bad->list, &delete_items);
6124                                 continue;
6125                         }
6126                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6127                                 continue;
6128                         fi = btrfs_item_ptr(buf, i,
6129                                             struct btrfs_file_extent_item);
6130                         if (btrfs_file_extent_type(buf, fi) ==
6131                             BTRFS_FILE_EXTENT_INLINE)
6132                                 continue;
6133                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6134                                 continue;
6135
6136                         data_bytes_allocated +=
6137                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6138                         if (data_bytes_allocated < root->sectorsize) {
6139                                 abort();
6140                         }
6141                         data_bytes_referenced +=
6142                                 btrfs_file_extent_num_bytes(buf, fi);
6143                         add_data_backref(extent_cache,
6144                                 btrfs_file_extent_disk_bytenr(buf, fi),
6145                                 parent, owner, key.objectid, key.offset -
6146                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6147                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6148                 }
6149         } else {
6150                 int level;
6151                 struct btrfs_key first_key;
6152
6153                 first_key.objectid = 0;
6154
6155                 if (nritems > 0)
6156                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6157                 level = btrfs_header_level(buf);
6158                 for (i = 0; i < nritems; i++) {
6159                         ptr = btrfs_node_blockptr(buf, i);
6160                         size = btrfs_level_size(root, level - 1);
6161                         btrfs_node_key_to_cpu(buf, &key, i);
6162                         if (ri != NULL) {
6163                                 if ((level == ri->drop_level)
6164                                     && is_dropped_key(&key, &ri->drop_key)) {
6165                                         continue;
6166                                 }
6167                         }
6168                         ret = add_extent_rec(extent_cache, &key,
6169                                              btrfs_node_ptr_generation(buf, i),
6170                                              ptr, size, 0, 0, 1, 0, 1, 0,
6171                                              size);
6172                         BUG_ON(ret);
6173
6174                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6175
6176                         if (level > 1) {
6177                                 add_pending(nodes, seen, ptr, size);
6178                         } else {
6179                                 add_pending(pending, seen, ptr, size);
6180                         }
6181                 }
6182                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6183                                       nritems) * sizeof(struct btrfs_key_ptr);
6184         }
6185         total_btree_bytes += buf->len;
6186         if (fs_root_objectid(btrfs_header_owner(buf)))
6187                 total_fs_tree_bytes += buf->len;
6188         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6189                 total_extent_tree_bytes += buf->len;
6190         if (!found_old_backref &&
6191             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6192             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6193             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6194                 found_old_backref = 1;
6195 out:
6196         free_extent_buffer(buf);
6197         return ret;
6198 }
6199
6200 static int add_root_to_pending(struct extent_buffer *buf,
6201                                struct cache_tree *extent_cache,
6202                                struct cache_tree *pending,
6203                                struct cache_tree *seen,
6204                                struct cache_tree *nodes,
6205                                u64 objectid)
6206 {
6207         if (btrfs_header_level(buf) > 0)
6208                 add_pending(nodes, seen, buf->start, buf->len);
6209         else
6210                 add_pending(pending, seen, buf->start, buf->len);
6211         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6212                        0, 1, 1, 0, 1, 0, buf->len);
6213
6214         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6215             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6216                 add_tree_backref(extent_cache, buf->start, buf->start,
6217                                  0, 1);
6218         else
6219                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6220         return 0;
6221 }
6222
6223 /* as we fix the tree, we might be deleting blocks that
6224  * we're tracking for repair.  This hook makes sure we
6225  * remove any backrefs for blocks as we are fixing them.
6226  */
6227 static int free_extent_hook(struct btrfs_trans_handle *trans,
6228                             struct btrfs_root *root,
6229                             u64 bytenr, u64 num_bytes, u64 parent,
6230                             u64 root_objectid, u64 owner, u64 offset,
6231                             int refs_to_drop)
6232 {
6233         struct extent_record *rec;
6234         struct cache_extent *cache;
6235         int is_data;
6236         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6237
6238         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6239         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6240         if (!cache)
6241                 return 0;
6242
6243         rec = container_of(cache, struct extent_record, cache);
6244         if (is_data) {
6245                 struct data_backref *back;
6246                 back = find_data_backref(rec, parent, root_objectid, owner,
6247                                          offset, 1, bytenr, num_bytes);
6248                 if (!back)
6249                         goto out;
6250                 if (back->node.found_ref) {
6251                         back->found_ref -= refs_to_drop;
6252                         if (rec->refs)
6253                                 rec->refs -= refs_to_drop;
6254                 }
6255                 if (back->node.found_extent_tree) {
6256                         back->num_refs -= refs_to_drop;
6257                         if (rec->extent_item_refs)
6258                                 rec->extent_item_refs -= refs_to_drop;
6259                 }
6260                 if (back->found_ref == 0)
6261                         back->node.found_ref = 0;
6262                 if (back->num_refs == 0)
6263                         back->node.found_extent_tree = 0;
6264
6265                 if (!back->node.found_extent_tree && back->node.found_ref) {
6266                         list_del(&back->node.list);
6267                         free(back);
6268                 }
6269         } else {
6270                 struct tree_backref *back;
6271                 back = find_tree_backref(rec, parent, root_objectid);
6272                 if (!back)
6273                         goto out;
6274                 if (back->node.found_ref) {
6275                         if (rec->refs)
6276                                 rec->refs--;
6277                         back->node.found_ref = 0;
6278                 }
6279                 if (back->node.found_extent_tree) {
6280                         if (rec->extent_item_refs)
6281                                 rec->extent_item_refs--;
6282                         back->node.found_extent_tree = 0;
6283                 }
6284                 if (!back->node.found_extent_tree && back->node.found_ref) {
6285                         list_del(&back->node.list);
6286                         free(back);
6287                 }
6288         }
6289         maybe_free_extent_rec(extent_cache, rec);
6290 out:
6291         return 0;
6292 }
6293
6294 static int delete_extent_records(struct btrfs_trans_handle *trans,
6295                                  struct btrfs_root *root,
6296                                  struct btrfs_path *path,
6297                                  u64 bytenr, u64 new_len)
6298 {
6299         struct btrfs_key key;
6300         struct btrfs_key found_key;
6301         struct extent_buffer *leaf;
6302         int ret;
6303         int slot;
6304
6305
6306         key.objectid = bytenr;
6307         key.type = (u8)-1;
6308         key.offset = (u64)-1;
6309
6310         while(1) {
6311                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6312                                         &key, path, 0, 1);
6313                 if (ret < 0)
6314                         break;
6315
6316                 if (ret > 0) {
6317                         ret = 0;
6318                         if (path->slots[0] == 0)
6319                                 break;
6320                         path->slots[0]--;
6321                 }
6322                 ret = 0;
6323
6324                 leaf = path->nodes[0];
6325                 slot = path->slots[0];
6326
6327                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6328                 if (found_key.objectid != bytenr)
6329                         break;
6330
6331                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6332                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6333                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6334                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6335                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6336                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6337                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6338                         btrfs_release_path(path);
6339                         if (found_key.type == 0) {
6340                                 if (found_key.offset == 0)
6341                                         break;
6342                                 key.offset = found_key.offset - 1;
6343                                 key.type = found_key.type;
6344                         }
6345                         key.type = found_key.type - 1;
6346                         key.offset = (u64)-1;
6347                         continue;
6348                 }
6349
6350                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6351                         found_key.objectid, found_key.type, found_key.offset);
6352
6353                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6354                 if (ret)
6355                         break;
6356                 btrfs_release_path(path);
6357
6358                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6359                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6360                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6361                                 found_key.offset : root->leafsize;
6362
6363                         ret = btrfs_update_block_group(trans, root, bytenr,
6364                                                        bytes, 0, 0);
6365                         if (ret)
6366                                 break;
6367                 }
6368         }
6369
6370         btrfs_release_path(path);
6371         return ret;
6372 }
6373
6374 /*
6375  * for a single backref, this will allocate a new extent
6376  * and add the backref to it.
6377  */
6378 static int record_extent(struct btrfs_trans_handle *trans,
6379                          struct btrfs_fs_info *info,
6380                          struct btrfs_path *path,
6381                          struct extent_record *rec,
6382                          struct extent_backref *back,
6383                          int allocated, u64 flags)
6384 {
6385         int ret;
6386         struct btrfs_root *extent_root = info->extent_root;
6387         struct extent_buffer *leaf;
6388         struct btrfs_key ins_key;
6389         struct btrfs_extent_item *ei;
6390         struct tree_backref *tback;
6391         struct data_backref *dback;
6392         struct btrfs_tree_block_info *bi;
6393
6394         if (!back->is_data)
6395                 rec->max_size = max_t(u64, rec->max_size,
6396                                     info->extent_root->leafsize);
6397
6398         if (!allocated) {
6399                 u32 item_size = sizeof(*ei);
6400
6401                 if (!back->is_data)
6402                         item_size += sizeof(*bi);
6403
6404                 ins_key.objectid = rec->start;
6405                 ins_key.offset = rec->max_size;
6406                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6407
6408                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6409                                         &ins_key, item_size);
6410                 if (ret)
6411                         goto fail;
6412
6413                 leaf = path->nodes[0];
6414                 ei = btrfs_item_ptr(leaf, path->slots[0],
6415                                     struct btrfs_extent_item);
6416
6417                 btrfs_set_extent_refs(leaf, ei, 0);
6418                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6419
6420                 if (back->is_data) {
6421                         btrfs_set_extent_flags(leaf, ei,
6422                                                BTRFS_EXTENT_FLAG_DATA);
6423                 } else {
6424                         struct btrfs_disk_key copy_key;;
6425
6426                         tback = (struct tree_backref *)back;
6427                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6428                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6429                                              sizeof(*bi));
6430
6431                         btrfs_set_disk_key_objectid(&copy_key,
6432                                                     rec->info_objectid);
6433                         btrfs_set_disk_key_type(&copy_key, 0);
6434                         btrfs_set_disk_key_offset(&copy_key, 0);
6435
6436                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6437                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6438
6439                         btrfs_set_extent_flags(leaf, ei,
6440                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6441                 }
6442
6443                 btrfs_mark_buffer_dirty(leaf);
6444                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6445                                                rec->max_size, 1, 0);
6446                 if (ret)
6447                         goto fail;
6448                 btrfs_release_path(path);
6449         }
6450
6451         if (back->is_data) {
6452                 u64 parent;
6453                 int i;
6454
6455                 dback = (struct data_backref *)back;
6456                 if (back->full_backref)
6457                         parent = dback->parent;
6458                 else
6459                         parent = 0;
6460
6461                 for (i = 0; i < dback->found_ref; i++) {
6462                         /* if parent != 0, we're doing a full backref
6463                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6464                          * just makes the backref allocator create a data
6465                          * backref
6466                          */
6467                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6468                                                    rec->start, rec->max_size,
6469                                                    parent,
6470                                                    dback->root,
6471                                                    parent ?
6472                                                    BTRFS_FIRST_FREE_OBJECTID :
6473                                                    dback->owner,
6474                                                    dback->offset);
6475                         if (ret)
6476                                 break;
6477                 }
6478                 fprintf(stderr, "adding new data backref"
6479                                 " on %llu %s %llu owner %llu"
6480                                 " offset %llu found %d\n",
6481                                 (unsigned long long)rec->start,
6482                                 back->full_backref ?
6483                                 "parent" : "root",
6484                                 back->full_backref ?
6485                                 (unsigned long long)parent :
6486                                 (unsigned long long)dback->root,
6487                                 (unsigned long long)dback->owner,
6488                                 (unsigned long long)dback->offset,
6489                                 dback->found_ref);
6490         } else {
6491                 u64 parent;
6492
6493                 tback = (struct tree_backref *)back;
6494                 if (back->full_backref)
6495                         parent = tback->parent;
6496                 else
6497                         parent = 0;
6498
6499                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6500                                            rec->start, rec->max_size,
6501                                            parent, tback->root, 0, 0);
6502                 fprintf(stderr, "adding new tree backref on "
6503                         "start %llu len %llu parent %llu root %llu\n",
6504                         rec->start, rec->max_size, parent, tback->root);
6505         }
6506 fail:
6507         btrfs_release_path(path);
6508         return ret;
6509 }
6510
6511 struct extent_entry {
6512         u64 bytenr;
6513         u64 bytes;
6514         int count;
6515         int broken;
6516         struct list_head list;
6517 };
6518
6519 static struct extent_entry *find_entry(struct list_head *entries,
6520                                        u64 bytenr, u64 bytes)
6521 {
6522         struct extent_entry *entry = NULL;
6523
6524         list_for_each_entry(entry, entries, list) {
6525                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6526                         return entry;
6527         }
6528
6529         return NULL;
6530 }
6531
6532 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6533 {
6534         struct extent_entry *entry, *best = NULL, *prev = NULL;
6535
6536         list_for_each_entry(entry, entries, list) {
6537                 if (!prev) {
6538                         prev = entry;
6539                         continue;
6540                 }
6541
6542                 /*
6543                  * If there are as many broken entries as entries then we know
6544                  * not to trust this particular entry.
6545                  */
6546                 if (entry->broken == entry->count)
6547                         continue;
6548
6549                 /*
6550                  * If our current entry == best then we can't be sure our best
6551                  * is really the best, so we need to keep searching.
6552                  */
6553                 if (best && best->count == entry->count) {
6554                         prev = entry;
6555                         best = NULL;
6556                         continue;
6557                 }
6558
6559                 /* Prev == entry, not good enough, have to keep searching */
6560                 if (!prev->broken && prev->count == entry->count)
6561                         continue;
6562
6563                 if (!best)
6564                         best = (prev->count > entry->count) ? prev : entry;
6565                 else if (best->count < entry->count)
6566                         best = entry;
6567                 prev = entry;
6568         }
6569
6570         return best;
6571 }
6572
6573 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6574                       struct data_backref *dback, struct extent_entry *entry)
6575 {
6576         struct btrfs_trans_handle *trans;
6577         struct btrfs_root *root;
6578         struct btrfs_file_extent_item *fi;
6579         struct extent_buffer *leaf;
6580         struct btrfs_key key;
6581         u64 bytenr, bytes;
6582         int ret, err;
6583
6584         key.objectid = dback->root;
6585         key.type = BTRFS_ROOT_ITEM_KEY;
6586         key.offset = (u64)-1;
6587         root = btrfs_read_fs_root(info, &key);
6588         if (IS_ERR(root)) {
6589                 fprintf(stderr, "Couldn't find root for our ref\n");
6590                 return -EINVAL;
6591         }
6592
6593         /*
6594          * The backref points to the original offset of the extent if it was
6595          * split, so we need to search down to the offset we have and then walk
6596          * forward until we find the backref we're looking for.
6597          */
6598         key.objectid = dback->owner;
6599         key.type = BTRFS_EXTENT_DATA_KEY;
6600         key.offset = dback->offset;
6601         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6602         if (ret < 0) {
6603                 fprintf(stderr, "Error looking up ref %d\n", ret);
6604                 return ret;
6605         }
6606
6607         while (1) {
6608                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6609                         ret = btrfs_next_leaf(root, path);
6610                         if (ret) {
6611                                 fprintf(stderr, "Couldn't find our ref, next\n");
6612                                 return -EINVAL;
6613                         }
6614                 }
6615                 leaf = path->nodes[0];
6616                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6617                 if (key.objectid != dback->owner ||
6618                     key.type != BTRFS_EXTENT_DATA_KEY) {
6619                         fprintf(stderr, "Couldn't find our ref, search\n");
6620                         return -EINVAL;
6621                 }
6622                 fi = btrfs_item_ptr(leaf, path->slots[0],
6623                                     struct btrfs_file_extent_item);
6624                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6625                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6626
6627                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6628                         break;
6629                 path->slots[0]++;
6630         }
6631
6632         btrfs_release_path(path);
6633
6634         trans = btrfs_start_transaction(root, 1);
6635         if (IS_ERR(trans))
6636                 return PTR_ERR(trans);
6637
6638         /*
6639          * Ok we have the key of the file extent we want to fix, now we can cow
6640          * down to the thing and fix it.
6641          */
6642         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6643         if (ret < 0) {
6644                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6645                         key.objectid, key.type, key.offset, ret);
6646                 goto out;
6647         }
6648         if (ret > 0) {
6649                 fprintf(stderr, "Well that's odd, we just found this key "
6650                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6651                         key.offset);
6652                 ret = -EINVAL;
6653                 goto out;
6654         }
6655         leaf = path->nodes[0];
6656         fi = btrfs_item_ptr(leaf, path->slots[0],
6657                             struct btrfs_file_extent_item);
6658
6659         if (btrfs_file_extent_compression(leaf, fi) &&
6660             dback->disk_bytenr != entry->bytenr) {
6661                 fprintf(stderr, "Ref doesn't match the record start and is "
6662                         "compressed, please take a btrfs-image of this file "
6663                         "system and send it to a btrfs developer so they can "
6664                         "complete this functionality for bytenr %Lu\n",
6665                         dback->disk_bytenr);
6666                 ret = -EINVAL;
6667                 goto out;
6668         }
6669
6670         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6671                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6672         } else if (dback->disk_bytenr > entry->bytenr) {
6673                 u64 off_diff, offset;
6674
6675                 off_diff = dback->disk_bytenr - entry->bytenr;
6676                 offset = btrfs_file_extent_offset(leaf, fi);
6677                 if (dback->disk_bytenr + offset +
6678                     btrfs_file_extent_num_bytes(leaf, fi) >
6679                     entry->bytenr + entry->bytes) {
6680                         fprintf(stderr, "Ref is past the entry end, please "
6681                                 "take a btrfs-image of this file system and "
6682                                 "send it to a btrfs developer, ref %Lu\n",
6683                                 dback->disk_bytenr);
6684                         ret = -EINVAL;
6685                         goto out;
6686                 }
6687                 offset += off_diff;
6688                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6689                 btrfs_set_file_extent_offset(leaf, fi, offset);
6690         } else if (dback->disk_bytenr < entry->bytenr) {
6691                 u64 offset;
6692
6693                 offset = btrfs_file_extent_offset(leaf, fi);
6694                 if (dback->disk_bytenr + offset < entry->bytenr) {
6695                         fprintf(stderr, "Ref is before the entry start, please"
6696                                 " take a btrfs-image of this file system and "
6697                                 "send it to a btrfs developer, ref %Lu\n",
6698                                 dback->disk_bytenr);
6699                         ret = -EINVAL;
6700                         goto out;
6701                 }
6702
6703                 offset += dback->disk_bytenr;
6704                 offset -= entry->bytenr;
6705                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6706                 btrfs_set_file_extent_offset(leaf, fi, offset);
6707         }
6708
6709         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6710
6711         /*
6712          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6713          * only do this if we aren't using compression, otherwise it's a
6714          * trickier case.
6715          */
6716         if (!btrfs_file_extent_compression(leaf, fi))
6717                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6718         else
6719                 printf("ram bytes may be wrong?\n");
6720         btrfs_mark_buffer_dirty(leaf);
6721 out:
6722         err = btrfs_commit_transaction(trans, root);
6723         btrfs_release_path(path);
6724         return ret ? ret : err;
6725 }
6726
6727 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6728                            struct extent_record *rec)
6729 {
6730         struct extent_backref *back;
6731         struct data_backref *dback;
6732         struct extent_entry *entry, *best = NULL;
6733         LIST_HEAD(entries);
6734         int nr_entries = 0;
6735         int broken_entries = 0;
6736         int ret = 0;
6737         short mismatch = 0;
6738
6739         /*
6740          * Metadata is easy and the backrefs should always agree on bytenr and
6741          * size, if not we've got bigger issues.
6742          */
6743         if (rec->metadata)
6744                 return 0;
6745
6746         list_for_each_entry(back, &rec->backrefs, list) {
6747                 if (back->full_backref || !back->is_data)
6748                         continue;
6749
6750                 dback = (struct data_backref *)back;
6751
6752                 /*
6753                  * We only pay attention to backrefs that we found a real
6754                  * backref for.
6755                  */
6756                 if (dback->found_ref == 0)
6757                         continue;
6758
6759                 /*
6760                  * For now we only catch when the bytes don't match, not the
6761                  * bytenr.  We can easily do this at the same time, but I want
6762                  * to have a fs image to test on before we just add repair
6763                  * functionality willy-nilly so we know we won't screw up the
6764                  * repair.
6765                  */
6766
6767                 entry = find_entry(&entries, dback->disk_bytenr,
6768                                    dback->bytes);
6769                 if (!entry) {
6770                         entry = malloc(sizeof(struct extent_entry));
6771                         if (!entry) {
6772                                 ret = -ENOMEM;
6773                                 goto out;
6774                         }
6775                         memset(entry, 0, sizeof(*entry));
6776                         entry->bytenr = dback->disk_bytenr;
6777                         entry->bytes = dback->bytes;
6778                         list_add_tail(&entry->list, &entries);
6779                         nr_entries++;
6780                 }
6781
6782                 /*
6783                  * If we only have on entry we may think the entries agree when
6784                  * in reality they don't so we have to do some extra checking.
6785                  */
6786                 if (dback->disk_bytenr != rec->start ||
6787                     dback->bytes != rec->nr || back->broken)
6788                         mismatch = 1;
6789
6790                 if (back->broken) {
6791                         entry->broken++;
6792                         broken_entries++;
6793                 }
6794
6795                 entry->count++;
6796         }
6797
6798         /* Yay all the backrefs agree, carry on good sir */
6799         if (nr_entries <= 1 && !mismatch)
6800                 goto out;
6801
6802         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6803                 "%Lu\n", rec->start);
6804
6805         /*
6806          * First we want to see if the backrefs can agree amongst themselves who
6807          * is right, so figure out which one of the entries has the highest
6808          * count.
6809          */
6810         best = find_most_right_entry(&entries);
6811
6812         /*
6813          * Ok so we may have an even split between what the backrefs think, so
6814          * this is where we use the extent ref to see what it thinks.
6815          */
6816         if (!best) {
6817                 entry = find_entry(&entries, rec->start, rec->nr);
6818                 if (!entry && (!broken_entries || !rec->found_rec)) {
6819                         fprintf(stderr, "Backrefs don't agree with each other "
6820                                 "and extent record doesn't agree with anybody,"
6821                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6822                                 rec->start, rec->nr);
6823                         ret = -EINVAL;
6824                         goto out;
6825                 } else if (!entry) {
6826                         /*
6827                          * Ok our backrefs were broken, we'll assume this is the
6828                          * correct value and add an entry for this range.
6829                          */
6830                         entry = malloc(sizeof(struct extent_entry));
6831                         if (!entry) {
6832                                 ret = -ENOMEM;
6833                                 goto out;
6834                         }
6835                         memset(entry, 0, sizeof(*entry));
6836                         entry->bytenr = rec->start;
6837                         entry->bytes = rec->nr;
6838                         list_add_tail(&entry->list, &entries);
6839                         nr_entries++;
6840                 }
6841                 entry->count++;
6842                 best = find_most_right_entry(&entries);
6843                 if (!best) {
6844                         fprintf(stderr, "Backrefs and extent record evenly "
6845                                 "split on who is right, this is going to "
6846                                 "require user input to fix bytenr %Lu bytes "
6847                                 "%Lu\n", rec->start, rec->nr);
6848                         ret = -EINVAL;
6849                         goto out;
6850                 }
6851         }
6852
6853         /*
6854          * I don't think this can happen currently as we'll abort() if we catch
6855          * this case higher up, but in case somebody removes that we still can't
6856          * deal with it properly here yet, so just bail out of that's the case.
6857          */
6858         if (best->bytenr != rec->start) {
6859                 fprintf(stderr, "Extent start and backref starts don't match, "
6860                         "please use btrfs-image on this file system and send "
6861                         "it to a btrfs developer so they can make fsck fix "
6862                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6863                         rec->start, rec->nr);
6864                 ret = -EINVAL;
6865                 goto out;
6866         }
6867
6868         /*
6869          * Ok great we all agreed on an extent record, let's go find the real
6870          * references and fix up the ones that don't match.
6871          */
6872         list_for_each_entry(back, &rec->backrefs, list) {
6873                 if (back->full_backref || !back->is_data)
6874                         continue;
6875
6876                 dback = (struct data_backref *)back;
6877
6878                 /*
6879                  * Still ignoring backrefs that don't have a real ref attached
6880                  * to them.
6881                  */
6882                 if (dback->found_ref == 0)
6883                         continue;
6884
6885                 if (dback->bytes == best->bytes &&
6886                     dback->disk_bytenr == best->bytenr)
6887                         continue;
6888
6889                 ret = repair_ref(info, path, dback, best);
6890                 if (ret)
6891                         goto out;
6892         }
6893
6894         /*
6895          * Ok we messed with the actual refs, which means we need to drop our
6896          * entire cache and go back and rescan.  I know this is a huge pain and
6897          * adds a lot of extra work, but it's the only way to be safe.  Once all
6898          * the backrefs agree we may not need to do anything to the extent
6899          * record itself.
6900          */
6901         ret = -EAGAIN;
6902 out:
6903         while (!list_empty(&entries)) {
6904                 entry = list_entry(entries.next, struct extent_entry, list);
6905                 list_del_init(&entry->list);
6906                 free(entry);
6907         }
6908         return ret;
6909 }
6910
6911 static int process_duplicates(struct btrfs_root *root,
6912                               struct cache_tree *extent_cache,
6913                               struct extent_record *rec)
6914 {
6915         struct extent_record *good, *tmp;
6916         struct cache_extent *cache;
6917         int ret;
6918
6919         /*
6920          * If we found a extent record for this extent then return, or if we
6921          * have more than one duplicate we are likely going to need to delete
6922          * something.
6923          */
6924         if (rec->found_rec || rec->num_duplicates > 1)
6925                 return 0;
6926
6927         /* Shouldn't happen but just in case */
6928         BUG_ON(!rec->num_duplicates);
6929
6930         /*
6931          * So this happens if we end up with a backref that doesn't match the
6932          * actual extent entry.  So either the backref is bad or the extent
6933          * entry is bad.  Either way we want to have the extent_record actually
6934          * reflect what we found in the extent_tree, so we need to take the
6935          * duplicate out and use that as the extent_record since the only way we
6936          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6937          */
6938         remove_cache_extent(extent_cache, &rec->cache);
6939
6940         good = list_entry(rec->dups.next, struct extent_record, list);
6941         list_del_init(&good->list);
6942         INIT_LIST_HEAD(&good->backrefs);
6943         INIT_LIST_HEAD(&good->dups);
6944         good->cache.start = good->start;
6945         good->cache.size = good->nr;
6946         good->content_checked = 0;
6947         good->owner_ref_checked = 0;
6948         good->num_duplicates = 0;
6949         good->refs = rec->refs;
6950         list_splice_init(&rec->backrefs, &good->backrefs);
6951         while (1) {
6952                 cache = lookup_cache_extent(extent_cache, good->start,
6953                                             good->nr);
6954                 if (!cache)
6955                         break;
6956                 tmp = container_of(cache, struct extent_record, cache);
6957
6958                 /*
6959                  * If we find another overlapping extent and it's found_rec is
6960                  * set then it's a duplicate and we need to try and delete
6961                  * something.
6962                  */
6963                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6964                         if (list_empty(&good->list))
6965                                 list_add_tail(&good->list,
6966                                               &duplicate_extents);
6967                         good->num_duplicates += tmp->num_duplicates + 1;
6968                         list_splice_init(&tmp->dups, &good->dups);
6969                         list_del_init(&tmp->list);
6970                         list_add_tail(&tmp->list, &good->dups);
6971                         remove_cache_extent(extent_cache, &tmp->cache);
6972                         continue;
6973                 }
6974
6975                 /*
6976                  * Ok we have another non extent item backed extent rec, so lets
6977                  * just add it to this extent and carry on like we did above.
6978                  */
6979                 good->refs += tmp->refs;
6980                 list_splice_init(&tmp->backrefs, &good->backrefs);
6981                 remove_cache_extent(extent_cache, &tmp->cache);
6982                 free(tmp);
6983         }
6984         ret = insert_cache_extent(extent_cache, &good->cache);
6985         BUG_ON(ret);
6986         free(rec);
6987         return good->num_duplicates ? 0 : 1;
6988 }
6989
6990 static int delete_duplicate_records(struct btrfs_root *root,
6991                                     struct extent_record *rec)
6992 {
6993         struct btrfs_trans_handle *trans;
6994         LIST_HEAD(delete_list);
6995         struct btrfs_path *path;
6996         struct extent_record *tmp, *good, *n;
6997         int nr_del = 0;
6998         int ret = 0, err;
6999         struct btrfs_key key;
7000
7001         path = btrfs_alloc_path();
7002         if (!path) {
7003                 ret = -ENOMEM;
7004                 goto out;
7005         }
7006
7007         good = rec;
7008         /* Find the record that covers all of the duplicates. */
7009         list_for_each_entry(tmp, &rec->dups, list) {
7010                 if (good->start < tmp->start)
7011                         continue;
7012                 if (good->nr > tmp->nr)
7013                         continue;
7014
7015                 if (tmp->start + tmp->nr < good->start + good->nr) {
7016                         fprintf(stderr, "Ok we have overlapping extents that "
7017                                 "aren't completely covered by eachother, this "
7018                                 "is going to require more careful thought.  "
7019                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7020                                 tmp->start, tmp->nr, good->start, good->nr);
7021                         abort();
7022                 }
7023                 good = tmp;
7024         }
7025
7026         if (good != rec)
7027                 list_add_tail(&rec->list, &delete_list);
7028
7029         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7030                 if (tmp == good)
7031                         continue;
7032                 list_move_tail(&tmp->list, &delete_list);
7033         }
7034
7035         root = root->fs_info->extent_root;
7036         trans = btrfs_start_transaction(root, 1);
7037         if (IS_ERR(trans)) {
7038                 ret = PTR_ERR(trans);
7039                 goto out;
7040         }
7041
7042         list_for_each_entry(tmp, &delete_list, list) {
7043                 if (tmp->found_rec == 0)
7044                         continue;
7045                 key.objectid = tmp->start;
7046                 key.type = BTRFS_EXTENT_ITEM_KEY;
7047                 key.offset = tmp->nr;
7048
7049                 /* Shouldn't happen but just in case */
7050                 if (tmp->metadata) {
7051                         fprintf(stderr, "Well this shouldn't happen, extent "
7052                                 "record overlaps but is metadata? "
7053                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7054                         abort();
7055                 }
7056
7057                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7058                 if (ret) {
7059                         if (ret > 0)
7060                                 ret = -EINVAL;
7061                         break;
7062                 }
7063                 ret = btrfs_del_item(trans, root, path);
7064                 if (ret)
7065                         break;
7066                 btrfs_release_path(path);
7067                 nr_del++;
7068         }
7069         err = btrfs_commit_transaction(trans, root);
7070         if (err && !ret)
7071                 ret = err;
7072 out:
7073         while (!list_empty(&delete_list)) {
7074                 tmp = list_entry(delete_list.next, struct extent_record, list);
7075                 list_del_init(&tmp->list);
7076                 if (tmp == rec)
7077                         continue;
7078                 free(tmp);
7079         }
7080
7081         while (!list_empty(&rec->dups)) {
7082                 tmp = list_entry(rec->dups.next, struct extent_record, list);
7083                 list_del_init(&tmp->list);
7084                 free(tmp);
7085         }
7086
7087         btrfs_free_path(path);
7088
7089         if (!ret && !nr_del)
7090                 rec->num_duplicates = 0;
7091
7092         return ret ? ret : nr_del;
7093 }
7094
7095 static int find_possible_backrefs(struct btrfs_fs_info *info,
7096                                   struct btrfs_path *path,
7097                                   struct cache_tree *extent_cache,
7098                                   struct extent_record *rec)
7099 {
7100         struct btrfs_root *root;
7101         struct extent_backref *back;
7102         struct data_backref *dback;
7103         struct cache_extent *cache;
7104         struct btrfs_file_extent_item *fi;
7105         struct btrfs_key key;
7106         u64 bytenr, bytes;
7107         int ret;
7108
7109         list_for_each_entry(back, &rec->backrefs, list) {
7110                 /* Don't care about full backrefs (poor unloved backrefs) */
7111                 if (back->full_backref || !back->is_data)
7112                         continue;
7113
7114                 dback = (struct data_backref *)back;
7115
7116                 /* We found this one, we don't need to do a lookup */
7117                 if (dback->found_ref)
7118                         continue;
7119
7120                 key.objectid = dback->root;
7121                 key.type = BTRFS_ROOT_ITEM_KEY;
7122                 key.offset = (u64)-1;
7123
7124                 root = btrfs_read_fs_root(info, &key);
7125
7126                 /* No root, definitely a bad ref, skip */
7127                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7128                         continue;
7129                 /* Other err, exit */
7130                 if (IS_ERR(root))
7131                         return PTR_ERR(root);
7132
7133                 key.objectid = dback->owner;
7134                 key.type = BTRFS_EXTENT_DATA_KEY;
7135                 key.offset = dback->offset;
7136                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7137                 if (ret) {
7138                         btrfs_release_path(path);
7139                         if (ret < 0)
7140                                 return ret;
7141                         /* Didn't find it, we can carry on */
7142                         ret = 0;
7143                         continue;
7144                 }
7145
7146                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7147                                     struct btrfs_file_extent_item);
7148                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7149                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7150                 btrfs_release_path(path);
7151                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7152                 if (cache) {
7153                         struct extent_record *tmp;
7154                         tmp = container_of(cache, struct extent_record, cache);
7155
7156                         /*
7157                          * If we found an extent record for the bytenr for this
7158                          * particular backref then we can't add it to our
7159                          * current extent record.  We only want to add backrefs
7160                          * that don't have a corresponding extent item in the
7161                          * extent tree since they likely belong to this record
7162                          * and we need to fix it if it doesn't match bytenrs.
7163                          */
7164                         if  (tmp->found_rec)
7165                                 continue;
7166                 }
7167
7168                 dback->found_ref += 1;
7169                 dback->disk_bytenr = bytenr;
7170                 dback->bytes = bytes;
7171
7172                 /*
7173                  * Set this so the verify backref code knows not to trust the
7174                  * values in this backref.
7175                  */
7176                 back->broken = 1;
7177         }
7178
7179         return 0;
7180 }
7181
7182 /*
7183  * Record orphan data ref into corresponding root.
7184  *
7185  * Return 0 if the extent item contains data ref and recorded.
7186  * Return 1 if the extent item contains no useful data ref
7187  *   On that case, it may contains only shared_dataref or metadata backref
7188  *   or the file extent exists(this should be handled by the extent bytenr
7189  *   recovery routine)
7190  * Return <0 if something goes wrong.
7191  */
7192 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7193                                       struct extent_record *rec)
7194 {
7195         struct btrfs_key key;
7196         struct btrfs_root *dest_root;
7197         struct extent_backref *back;
7198         struct data_backref *dback;
7199         struct orphan_data_extent *orphan;
7200         struct btrfs_path *path;
7201         int recorded_data_ref = 0;
7202         int ret = 0;
7203
7204         if (rec->metadata)
7205                 return 1;
7206         path = btrfs_alloc_path();
7207         if (!path)
7208                 return -ENOMEM;
7209         list_for_each_entry(back, &rec->backrefs, list) {
7210                 if (back->full_backref || !back->is_data ||
7211                     !back->found_extent_tree)
7212                         continue;
7213                 dback = (struct data_backref *)back;
7214                 if (dback->found_ref)
7215                         continue;
7216                 key.objectid = dback->root;
7217                 key.type = BTRFS_ROOT_ITEM_KEY;
7218                 key.offset = (u64)-1;
7219
7220                 dest_root = btrfs_read_fs_root(fs_info, &key);
7221
7222                 /* For non-exist root we just skip it */
7223                 if (IS_ERR(dest_root) || !dest_root)
7224                         continue;
7225
7226                 key.objectid = dback->owner;
7227                 key.type = BTRFS_EXTENT_DATA_KEY;
7228                 key.offset = dback->offset;
7229
7230                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7231                 /*
7232                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7233                  * we need to record it for inode/file extent rebuild.
7234                  * For ret > 0, we record it only for file extent rebuild.
7235                  * For ret == 0, the file extent exists but only bytenr
7236                  * mismatch, let the original bytenr fix routine to handle,
7237                  * don't record it.
7238                  */
7239                 if (ret == 0)
7240                         continue;
7241                 ret = 0;
7242                 orphan = malloc(sizeof(*orphan));
7243                 if (!orphan) {
7244                         ret = -ENOMEM;
7245                         goto out;
7246                 }
7247                 INIT_LIST_HEAD(&orphan->list);
7248                 orphan->root = dback->root;
7249                 orphan->objectid = dback->owner;
7250                 orphan->offset = dback->offset;
7251                 orphan->disk_bytenr = rec->cache.start;
7252                 orphan->disk_len = rec->cache.size;
7253                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7254                 recorded_data_ref = 1;
7255         }
7256 out:
7257         btrfs_free_path(path);
7258         if (!ret)
7259                 return !recorded_data_ref;
7260         else
7261                 return ret;
7262 }
7263
7264 /*
7265  * when an incorrect extent item is found, this will delete
7266  * all of the existing entries for it and recreate them
7267  * based on what the tree scan found.
7268  */
7269 static int fixup_extent_refs(struct btrfs_fs_info *info,
7270                              struct cache_tree *extent_cache,
7271                              struct extent_record *rec)
7272 {
7273         struct btrfs_trans_handle *trans = NULL;
7274         int ret;
7275         struct btrfs_path *path;
7276         struct list_head *cur = rec->backrefs.next;
7277         struct cache_extent *cache;
7278         struct extent_backref *back;
7279         int allocated = 0;
7280         u64 flags = 0;
7281
7282         if (rec->flag_block_full_backref)
7283                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7284
7285         path = btrfs_alloc_path();
7286         if (!path)
7287                 return -ENOMEM;
7288
7289         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7290                 /*
7291                  * Sometimes the backrefs themselves are so broken they don't
7292                  * get attached to any meaningful rec, so first go back and
7293                  * check any of our backrefs that we couldn't find and throw
7294                  * them into the list if we find the backref so that
7295                  * verify_backrefs can figure out what to do.
7296                  */
7297                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7298                 if (ret < 0)
7299                         goto out;
7300         }
7301
7302         /* step one, make sure all of the backrefs agree */
7303         ret = verify_backrefs(info, path, rec);
7304         if (ret < 0)
7305                 goto out;
7306
7307         trans = btrfs_start_transaction(info->extent_root, 1);
7308         if (IS_ERR(trans)) {
7309                 ret = PTR_ERR(trans);
7310                 goto out;
7311         }
7312
7313         /* step two, delete all the existing records */
7314         ret = delete_extent_records(trans, info->extent_root, path,
7315                                     rec->start, rec->max_size);
7316
7317         if (ret < 0)
7318                 goto out;
7319
7320         /* was this block corrupt?  If so, don't add references to it */
7321         cache = lookup_cache_extent(info->corrupt_blocks,
7322                                     rec->start, rec->max_size);
7323         if (cache) {
7324                 ret = 0;
7325                 goto out;
7326         }
7327
7328         /* step three, recreate all the refs we did find */
7329         while(cur != &rec->backrefs) {
7330                 back = list_entry(cur, struct extent_backref, list);
7331                 cur = cur->next;
7332
7333                 /*
7334                  * if we didn't find any references, don't create a
7335                  * new extent record
7336                  */
7337                 if (!back->found_ref)
7338                         continue;
7339
7340                 rec->bad_full_backref = 0;
7341                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7342                 allocated = 1;
7343
7344                 if (ret)
7345                         goto out;
7346         }
7347 out:
7348         if (trans) {
7349                 int err = btrfs_commit_transaction(trans, info->extent_root);
7350                 if (!ret)
7351                         ret = err;
7352         }
7353
7354         btrfs_free_path(path);
7355         return ret;
7356 }
7357
7358 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7359                               struct extent_record *rec)
7360 {
7361         struct btrfs_trans_handle *trans;
7362         struct btrfs_root *root = fs_info->extent_root;
7363         struct btrfs_path *path;
7364         struct btrfs_extent_item *ei;
7365         struct btrfs_key key;
7366         u64 flags;
7367         int ret = 0;
7368
7369         key.objectid = rec->start;
7370         if (rec->metadata) {
7371                 key.type = BTRFS_METADATA_ITEM_KEY;
7372                 key.offset = rec->info_level;
7373         } else {
7374                 key.type = BTRFS_EXTENT_ITEM_KEY;
7375                 key.offset = rec->max_size;
7376         }
7377
7378         path = btrfs_alloc_path();
7379         if (!path)
7380                 return -ENOMEM;
7381
7382         trans = btrfs_start_transaction(root, 0);
7383         if (IS_ERR(trans)) {
7384                 btrfs_free_path(path);
7385                 return PTR_ERR(trans);
7386         }
7387
7388         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7389         if (ret < 0) {
7390                 btrfs_free_path(path);
7391                 btrfs_commit_transaction(trans, root);
7392                 return ret;
7393         } else if (ret) {
7394                 fprintf(stderr, "Didn't find extent for %llu\n",
7395                         (unsigned long long)rec->start);
7396                 btrfs_free_path(path);
7397                 btrfs_commit_transaction(trans, root);
7398                 return -ENOENT;
7399         }
7400
7401         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7402                             struct btrfs_extent_item);
7403         flags = btrfs_extent_flags(path->nodes[0], ei);
7404         if (rec->flag_block_full_backref) {
7405                 fprintf(stderr, "setting full backref on %llu\n",
7406                         (unsigned long long)key.objectid);
7407                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7408         } else {
7409                 fprintf(stderr, "clearing full backref on %llu\n",
7410                         (unsigned long long)key.objectid);
7411                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7412         }
7413         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7414         btrfs_mark_buffer_dirty(path->nodes[0]);
7415         btrfs_free_path(path);
7416         return btrfs_commit_transaction(trans, root);
7417 }
7418
7419 /* right now we only prune from the extent allocation tree */
7420 static int prune_one_block(struct btrfs_trans_handle *trans,
7421                            struct btrfs_fs_info *info,
7422                            struct btrfs_corrupt_block *corrupt)
7423 {
7424         int ret;
7425         struct btrfs_path path;
7426         struct extent_buffer *eb;
7427         u64 found;
7428         int slot;
7429         int nritems;
7430         int level = corrupt->level + 1;
7431
7432         btrfs_init_path(&path);
7433 again:
7434         /* we want to stop at the parent to our busted block */
7435         path.lowest_level = level;
7436
7437         ret = btrfs_search_slot(trans, info->extent_root,
7438                                 &corrupt->key, &path, -1, 1);
7439
7440         if (ret < 0)
7441                 goto out;
7442
7443         eb = path.nodes[level];
7444         if (!eb) {
7445                 ret = -ENOENT;
7446                 goto out;
7447         }
7448
7449         /*
7450          * hopefully the search gave us the block we want to prune,
7451          * lets try that first
7452          */
7453         slot = path.slots[level];
7454         found =  btrfs_node_blockptr(eb, slot);
7455         if (found == corrupt->cache.start)
7456                 goto del_ptr;
7457
7458         nritems = btrfs_header_nritems(eb);
7459
7460         /* the search failed, lets scan this node and hope we find it */
7461         for (slot = 0; slot < nritems; slot++) {
7462                 found =  btrfs_node_blockptr(eb, slot);
7463                 if (found == corrupt->cache.start)
7464                         goto del_ptr;
7465         }
7466         /*
7467          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7468          * to this block
7469          */
7470         if (eb == info->extent_root->node) {
7471                 ret = -ENOENT;
7472                 goto out;
7473         } else {
7474                 level++;
7475                 btrfs_release_path(&path);
7476                 goto again;
7477         }
7478
7479 del_ptr:
7480         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7481         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7482
7483 out:
7484         btrfs_release_path(&path);
7485         return ret;
7486 }
7487
7488 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7489 {
7490         struct btrfs_trans_handle *trans = NULL;
7491         struct cache_extent *cache;
7492         struct btrfs_corrupt_block *corrupt;
7493
7494         while (1) {
7495                 cache = search_cache_extent(info->corrupt_blocks, 0);
7496                 if (!cache)
7497                         break;
7498                 if (!trans) {
7499                         trans = btrfs_start_transaction(info->extent_root, 1);
7500                         if (IS_ERR(trans))
7501                                 return PTR_ERR(trans);
7502                 }
7503                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7504                 prune_one_block(trans, info, corrupt);
7505                 remove_cache_extent(info->corrupt_blocks, cache);
7506         }
7507         if (trans)
7508                 return btrfs_commit_transaction(trans, info->extent_root);
7509         return 0;
7510 }
7511
7512 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7513 {
7514         struct btrfs_block_group_cache *cache;
7515         u64 start, end;
7516         int ret;
7517
7518         while (1) {
7519                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7520                                             &start, &end, EXTENT_DIRTY);
7521                 if (ret)
7522                         break;
7523                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7524                                    GFP_NOFS);
7525         }
7526
7527         start = 0;
7528         while (1) {
7529                 cache = btrfs_lookup_first_block_group(fs_info, start);
7530                 if (!cache)
7531                         break;
7532                 if (cache->cached)
7533                         cache->cached = 0;
7534                 start = cache->key.objectid + cache->key.offset;
7535         }
7536 }
7537
7538 static int check_extent_refs(struct btrfs_root *root,
7539                              struct cache_tree *extent_cache)
7540 {
7541         struct extent_record *rec;
7542         struct cache_extent *cache;
7543         int err = 0;
7544         int ret = 0;
7545         int fixed = 0;
7546         int had_dups = 0;
7547         int recorded = 0;
7548
7549         if (repair) {
7550                 /*
7551                  * if we're doing a repair, we have to make sure
7552                  * we don't allocate from the problem extents.
7553                  * In the worst case, this will be all the
7554                  * extents in the FS
7555                  */
7556                 cache = search_cache_extent(extent_cache, 0);
7557                 while(cache) {
7558                         rec = container_of(cache, struct extent_record, cache);
7559                         set_extent_dirty(root->fs_info->excluded_extents,
7560                                          rec->start,
7561                                          rec->start + rec->max_size - 1,
7562                                          GFP_NOFS);
7563                         cache = next_cache_extent(cache);
7564                 }
7565
7566                 /* pin down all the corrupted blocks too */
7567                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7568                 while(cache) {
7569                         set_extent_dirty(root->fs_info->excluded_extents,
7570                                          cache->start,
7571                                          cache->start + cache->size - 1,
7572                                          GFP_NOFS);
7573                         cache = next_cache_extent(cache);
7574                 }
7575                 prune_corrupt_blocks(root->fs_info);
7576                 reset_cached_block_groups(root->fs_info);
7577         }
7578
7579         reset_cached_block_groups(root->fs_info);
7580
7581         /*
7582          * We need to delete any duplicate entries we find first otherwise we
7583          * could mess up the extent tree when we have backrefs that actually
7584          * belong to a different extent item and not the weird duplicate one.
7585          */
7586         while (repair && !list_empty(&duplicate_extents)) {
7587                 rec = list_entry(duplicate_extents.next, struct extent_record,
7588                                  list);
7589                 list_del_init(&rec->list);
7590
7591                 /* Sometimes we can find a backref before we find an actual
7592                  * extent, so we need to process it a little bit to see if there
7593                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7594                  * if this is a backref screwup.  If we need to delete stuff
7595                  * process_duplicates() will return 0, otherwise it will return
7596                  * 1 and we
7597                  */
7598                 if (process_duplicates(root, extent_cache, rec))
7599                         continue;
7600                 ret = delete_duplicate_records(root, rec);
7601                 if (ret < 0)
7602                         return ret;
7603                 /*
7604                  * delete_duplicate_records will return the number of entries
7605                  * deleted, so if it's greater than 0 then we know we actually
7606                  * did something and we need to remove.
7607                  */
7608                 if (ret)
7609                         had_dups = 1;
7610         }
7611
7612         if (had_dups)
7613                 return -EAGAIN;
7614
7615         while(1) {
7616                 int cur_err = 0;
7617
7618                 fixed = 0;
7619                 recorded = 0;
7620                 cache = search_cache_extent(extent_cache, 0);
7621                 if (!cache)
7622                         break;
7623                 rec = container_of(cache, struct extent_record, cache);
7624                 if (rec->num_duplicates) {
7625                         fprintf(stderr, "extent item %llu has multiple extent "
7626                                 "items\n", (unsigned long long)rec->start);
7627                         err = 1;
7628                         cur_err = 1;
7629                 }
7630
7631                 if (rec->refs != rec->extent_item_refs) {
7632                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7633                                 (unsigned long long)rec->start,
7634                                 (unsigned long long)rec->nr);
7635                         fprintf(stderr, "extent item %llu, found %llu\n",
7636                                 (unsigned long long)rec->extent_item_refs,
7637                                 (unsigned long long)rec->refs);
7638                         ret = record_orphan_data_extents(root->fs_info, rec);
7639                         if (ret < 0)
7640                                 goto repair_abort;
7641                         if (ret == 0) {
7642                                 recorded = 1;
7643                         } else {
7644                                 /*
7645                                  * we can't use the extent to repair file
7646                                  * extent, let the fallback method handle it.
7647                                  */
7648                                 if (!fixed && repair) {
7649                                         ret = fixup_extent_refs(
7650                                                         root->fs_info,
7651                                                         extent_cache, rec);
7652                                         if (ret)
7653                                                 goto repair_abort;
7654                                         fixed = 1;
7655                                 }
7656                         }
7657                         err = 1;
7658                         cur_err = 1;
7659                 }
7660                 if (all_backpointers_checked(rec, 1)) {
7661                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7662                                 (unsigned long long)rec->start,
7663                                 (unsigned long long)rec->nr);
7664
7665                         if (!fixed && !recorded && repair) {
7666                                 ret = fixup_extent_refs(root->fs_info,
7667                                                         extent_cache, rec);
7668                                 if (ret)
7669                                         goto repair_abort;
7670                                 fixed = 1;
7671                         }
7672                         cur_err = 1;
7673                         err = 1;
7674                 }
7675                 if (!rec->owner_ref_checked) {
7676                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7677                                 (unsigned long long)rec->start,
7678                                 (unsigned long long)rec->nr);
7679                         if (!fixed && !recorded && repair) {
7680                                 ret = fixup_extent_refs(root->fs_info,
7681                                                         extent_cache, rec);
7682                                 if (ret)
7683                                         goto repair_abort;
7684                                 fixed = 1;
7685                         }
7686                         err = 1;
7687                         cur_err = 1;
7688                 }
7689                 if (rec->bad_full_backref) {
7690                         fprintf(stderr, "bad full backref, on [%llu]\n",
7691                                 (unsigned long long)rec->start);
7692                         if (repair) {
7693                                 ret = fixup_extent_flags(root->fs_info, rec);
7694                                 if (ret)
7695                                         goto repair_abort;
7696                                 fixed = 1;
7697                         }
7698                         err = 1;
7699                         cur_err = 1;
7700                 }
7701                 /*
7702                  * Although it's not a extent ref's problem, we reuse this
7703                  * routine for error reporting.
7704                  * No repair function yet.
7705                  */
7706                 if (rec->crossing_stripes) {
7707                         fprintf(stderr,
7708                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7709                                 rec->start, rec->start + rec->max_size);
7710                         err = 1;
7711                         cur_err = 1;
7712                 }
7713
7714                 if (rec->wrong_chunk_type) {
7715                         fprintf(stderr,
7716                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7717                                 rec->start, rec->start + rec->max_size);
7718                         err = 1;
7719                         cur_err = 1;
7720                 }
7721
7722                 remove_cache_extent(extent_cache, cache);
7723                 free_all_extent_backrefs(rec);
7724                 if (!init_extent_tree && repair && (!cur_err || fixed))
7725                         clear_extent_dirty(root->fs_info->excluded_extents,
7726                                            rec->start,
7727                                            rec->start + rec->max_size - 1,
7728                                            GFP_NOFS);
7729                 free(rec);
7730         }
7731 repair_abort:
7732         if (repair) {
7733                 if (ret && ret != -EAGAIN) {
7734                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7735                         exit(1);
7736                 } else if (!ret) {
7737                         struct btrfs_trans_handle *trans;
7738
7739                         root = root->fs_info->extent_root;
7740                         trans = btrfs_start_transaction(root, 1);
7741                         if (IS_ERR(trans)) {
7742                                 ret = PTR_ERR(trans);
7743                                 goto repair_abort;
7744                         }
7745
7746                         btrfs_fix_block_accounting(trans, root);
7747                         ret = btrfs_commit_transaction(trans, root);
7748                         if (ret)
7749                                 goto repair_abort;
7750                 }
7751                 if (err)
7752                         fprintf(stderr, "repaired damaged extent references\n");
7753                 return ret;
7754         }
7755         return err;
7756 }
7757
7758 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7759 {
7760         u64 stripe_size;
7761
7762         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7763                 stripe_size = length;
7764                 stripe_size /= num_stripes;
7765         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7766                 stripe_size = length * 2;
7767                 stripe_size /= num_stripes;
7768         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7769                 stripe_size = length;
7770                 stripe_size /= (num_stripes - 1);
7771         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7772                 stripe_size = length;
7773                 stripe_size /= (num_stripes - 2);
7774         } else {
7775                 stripe_size = length;
7776         }
7777         return stripe_size;
7778 }
7779
7780 /*
7781  * Check the chunk with its block group/dev list ref:
7782  * Return 0 if all refs seems valid.
7783  * Return 1 if part of refs seems valid, need later check for rebuild ref
7784  * like missing block group and needs to search extent tree to rebuild them.
7785  * Return -1 if essential refs are missing and unable to rebuild.
7786  */
7787 static int check_chunk_refs(struct chunk_record *chunk_rec,
7788                             struct block_group_tree *block_group_cache,
7789                             struct device_extent_tree *dev_extent_cache,
7790                             int silent)
7791 {
7792         struct cache_extent *block_group_item;
7793         struct block_group_record *block_group_rec;
7794         struct cache_extent *dev_extent_item;
7795         struct device_extent_record *dev_extent_rec;
7796         u64 devid;
7797         u64 offset;
7798         u64 length;
7799         int metadump_v2 = 0;
7800         int i;
7801         int ret = 0;
7802
7803         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7804                                                chunk_rec->offset,
7805                                                chunk_rec->length);
7806         if (block_group_item) {
7807                 block_group_rec = container_of(block_group_item,
7808                                                struct block_group_record,
7809                                                cache);
7810                 if (chunk_rec->length != block_group_rec->offset ||
7811                     chunk_rec->offset != block_group_rec->objectid ||
7812                     (!metadump_v2 &&
7813                      chunk_rec->type_flags != block_group_rec->flags)) {
7814                         if (!silent)
7815                                 fprintf(stderr,
7816                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7817                                         chunk_rec->objectid,
7818                                         chunk_rec->type,
7819                                         chunk_rec->offset,
7820                                         chunk_rec->length,
7821                                         chunk_rec->offset,
7822                                         chunk_rec->type_flags,
7823                                         block_group_rec->objectid,
7824                                         block_group_rec->type,
7825                                         block_group_rec->offset,
7826                                         block_group_rec->offset,
7827                                         block_group_rec->objectid,
7828                                         block_group_rec->flags);
7829                         ret = -1;
7830                 } else {
7831                         list_del_init(&block_group_rec->list);
7832                         chunk_rec->bg_rec = block_group_rec;
7833                 }
7834         } else {
7835                 if (!silent)
7836                         fprintf(stderr,
7837                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7838                                 chunk_rec->objectid,
7839                                 chunk_rec->type,
7840                                 chunk_rec->offset,
7841                                 chunk_rec->length,
7842                                 chunk_rec->offset,
7843                                 chunk_rec->type_flags);
7844                 ret = 1;
7845         }
7846
7847         if (metadump_v2)
7848                 return ret;
7849
7850         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7851                                     chunk_rec->num_stripes);
7852         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7853                 devid = chunk_rec->stripes[i].devid;
7854                 offset = chunk_rec->stripes[i].offset;
7855                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7856                                                        devid, offset, length);
7857                 if (dev_extent_item) {
7858                         dev_extent_rec = container_of(dev_extent_item,
7859                                                 struct device_extent_record,
7860                                                 cache);
7861                         if (dev_extent_rec->objectid != devid ||
7862                             dev_extent_rec->offset != offset ||
7863                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7864                             dev_extent_rec->length != length) {
7865                                 if (!silent)
7866                                         fprintf(stderr,
7867                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7868                                                 chunk_rec->objectid,
7869                                                 chunk_rec->type,
7870                                                 chunk_rec->offset,
7871                                                 chunk_rec->stripes[i].devid,
7872                                                 chunk_rec->stripes[i].offset,
7873                                                 dev_extent_rec->objectid,
7874                                                 dev_extent_rec->offset,
7875                                                 dev_extent_rec->length);
7876                                 ret = -1;
7877                         } else {
7878                                 list_move(&dev_extent_rec->chunk_list,
7879                                           &chunk_rec->dextents);
7880                         }
7881                 } else {
7882                         if (!silent)
7883                                 fprintf(stderr,
7884                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7885                                         chunk_rec->objectid,
7886                                         chunk_rec->type,
7887                                         chunk_rec->offset,
7888                                         chunk_rec->stripes[i].devid,
7889                                         chunk_rec->stripes[i].offset);
7890                         ret = -1;
7891                 }
7892         }
7893         return ret;
7894 }
7895
7896 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7897 int check_chunks(struct cache_tree *chunk_cache,
7898                  struct block_group_tree *block_group_cache,
7899                  struct device_extent_tree *dev_extent_cache,
7900                  struct list_head *good, struct list_head *bad,
7901                  struct list_head *rebuild, int silent)
7902 {
7903         struct cache_extent *chunk_item;
7904         struct chunk_record *chunk_rec;
7905         struct block_group_record *bg_rec;
7906         struct device_extent_record *dext_rec;
7907         int err;
7908         int ret = 0;
7909
7910         chunk_item = first_cache_extent(chunk_cache);
7911         while (chunk_item) {
7912                 chunk_rec = container_of(chunk_item, struct chunk_record,
7913                                          cache);
7914                 err = check_chunk_refs(chunk_rec, block_group_cache,
7915                                        dev_extent_cache, silent);
7916                 if (err < 0)
7917                         ret = err;
7918                 if (err == 0 && good)
7919                         list_add_tail(&chunk_rec->list, good);
7920                 if (err > 0 && rebuild)
7921                         list_add_tail(&chunk_rec->list, rebuild);
7922                 if (err < 0 && bad)
7923                         list_add_tail(&chunk_rec->list, bad);
7924                 chunk_item = next_cache_extent(chunk_item);
7925         }
7926
7927         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7928                 if (!silent)
7929                         fprintf(stderr,
7930                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7931                                 bg_rec->objectid,
7932                                 bg_rec->offset,
7933                                 bg_rec->flags);
7934                 if (!ret)
7935                         ret = 1;
7936         }
7937
7938         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7939                             chunk_list) {
7940                 if (!silent)
7941                         fprintf(stderr,
7942                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7943                                 dext_rec->objectid,
7944                                 dext_rec->offset,
7945                                 dext_rec->length);
7946                 if (!ret)
7947                         ret = 1;
7948         }
7949         return ret;
7950 }
7951
7952
7953 static int check_device_used(struct device_record *dev_rec,
7954                              struct device_extent_tree *dext_cache)
7955 {
7956         struct cache_extent *cache;
7957         struct device_extent_record *dev_extent_rec;
7958         u64 total_byte = 0;
7959
7960         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7961         while (cache) {
7962                 dev_extent_rec = container_of(cache,
7963                                               struct device_extent_record,
7964                                               cache);
7965                 if (dev_extent_rec->objectid != dev_rec->devid)
7966                         break;
7967
7968                 list_del_init(&dev_extent_rec->device_list);
7969                 total_byte += dev_extent_rec->length;
7970                 cache = next_cache_extent(cache);
7971         }
7972
7973         if (total_byte != dev_rec->byte_used) {
7974                 fprintf(stderr,
7975                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7976                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7977                         dev_rec->type, dev_rec->offset);
7978                 return -1;
7979         } else {
7980                 return 0;
7981         }
7982 }
7983
7984 /* check btrfs_dev_item -> btrfs_dev_extent */
7985 static int check_devices(struct rb_root *dev_cache,
7986                          struct device_extent_tree *dev_extent_cache)
7987 {
7988         struct rb_node *dev_node;
7989         struct device_record *dev_rec;
7990         struct device_extent_record *dext_rec;
7991         int err;
7992         int ret = 0;
7993
7994         dev_node = rb_first(dev_cache);
7995         while (dev_node) {
7996                 dev_rec = container_of(dev_node, struct device_record, node);
7997                 err = check_device_used(dev_rec, dev_extent_cache);
7998                 if (err)
7999                         ret = err;
8000
8001                 dev_node = rb_next(dev_node);
8002         }
8003         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8004                             device_list) {
8005                 fprintf(stderr,
8006                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8007                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8008                 if (!ret)
8009                         ret = 1;
8010         }
8011         return ret;
8012 }
8013
8014 static int add_root_item_to_list(struct list_head *head,
8015                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8016                                   u8 level, u8 drop_level,
8017                                   int level_size, struct btrfs_key *drop_key)
8018 {
8019
8020         struct root_item_record *ri_rec;
8021         ri_rec = malloc(sizeof(*ri_rec));
8022         if (!ri_rec)
8023                 return -ENOMEM;
8024         ri_rec->bytenr = bytenr;
8025         ri_rec->objectid = objectid;
8026         ri_rec->level = level;
8027         ri_rec->level_size = level_size;
8028         ri_rec->drop_level = drop_level;
8029         ri_rec->last_snapshot = last_snapshot;
8030         if (drop_key)
8031                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8032         list_add_tail(&ri_rec->list, head);
8033
8034         return 0;
8035 }
8036
8037 static void free_root_item_list(struct list_head *list)
8038 {
8039         struct root_item_record *ri_rec;
8040
8041         while (!list_empty(list)) {
8042                 ri_rec = list_first_entry(list, struct root_item_record,
8043                                           list);
8044                 list_del_init(&ri_rec->list);
8045                 free(ri_rec);
8046         }
8047 }
8048
8049 static int deal_root_from_list(struct list_head *list,
8050                                struct btrfs_root *root,
8051                                struct block_info *bits,
8052                                int bits_nr,
8053                                struct cache_tree *pending,
8054                                struct cache_tree *seen,
8055                                struct cache_tree *reada,
8056                                struct cache_tree *nodes,
8057                                struct cache_tree *extent_cache,
8058                                struct cache_tree *chunk_cache,
8059                                struct rb_root *dev_cache,
8060                                struct block_group_tree *block_group_cache,
8061                                struct device_extent_tree *dev_extent_cache)
8062 {
8063         int ret = 0;
8064         u64 last;
8065
8066         while (!list_empty(list)) {
8067                 struct root_item_record *rec;
8068                 struct extent_buffer *buf;
8069                 rec = list_entry(list->next,
8070                                  struct root_item_record, list);
8071                 last = 0;
8072                 buf = read_tree_block(root->fs_info->tree_root,
8073                                       rec->bytenr, rec->level_size, 0);
8074                 if (!extent_buffer_uptodate(buf)) {
8075                         free_extent_buffer(buf);
8076                         ret = -EIO;
8077                         break;
8078                 }
8079                 add_root_to_pending(buf, extent_cache, pending,
8080                                     seen, nodes, rec->objectid);
8081                 /*
8082                  * To rebuild extent tree, we need deal with snapshot
8083                  * one by one, otherwise we deal with node firstly which
8084                  * can maximize readahead.
8085                  */
8086                 while (1) {
8087                         ret = run_next_block(root, bits, bits_nr, &last,
8088                                              pending, seen, reada, nodes,
8089                                              extent_cache, chunk_cache,
8090                                              dev_cache, block_group_cache,
8091                                              dev_extent_cache, rec);
8092                         if (ret != 0)
8093                                 break;
8094                 }
8095                 free_extent_buffer(buf);
8096                 list_del(&rec->list);
8097                 free(rec);
8098                 if (ret < 0)
8099                         break;
8100         }
8101         while (ret >= 0) {
8102                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8103                                      reada, nodes, extent_cache, chunk_cache,
8104                                      dev_cache, block_group_cache,
8105                                      dev_extent_cache, NULL);
8106                 if (ret != 0) {
8107                         if (ret > 0)
8108                                 ret = 0;
8109                         break;
8110                 }
8111         }
8112         return ret;
8113 }
8114
8115 static int check_chunks_and_extents(struct btrfs_root *root)
8116 {
8117         struct rb_root dev_cache;
8118         struct cache_tree chunk_cache;
8119         struct block_group_tree block_group_cache;
8120         struct device_extent_tree dev_extent_cache;
8121         struct cache_tree extent_cache;
8122         struct cache_tree seen;
8123         struct cache_tree pending;
8124         struct cache_tree reada;
8125         struct cache_tree nodes;
8126         struct extent_io_tree excluded_extents;
8127         struct cache_tree corrupt_blocks;
8128         struct btrfs_path path;
8129         struct btrfs_key key;
8130         struct btrfs_key found_key;
8131         int ret, err = 0;
8132         struct block_info *bits;
8133         int bits_nr;
8134         struct extent_buffer *leaf;
8135         int slot;
8136         struct btrfs_root_item ri;
8137         struct list_head dropping_trees;
8138         struct list_head normal_trees;
8139         struct btrfs_root *root1;
8140         u64 objectid;
8141         u32 level_size;
8142         u8 level;
8143
8144         dev_cache = RB_ROOT;
8145         cache_tree_init(&chunk_cache);
8146         block_group_tree_init(&block_group_cache);
8147         device_extent_tree_init(&dev_extent_cache);
8148
8149         cache_tree_init(&extent_cache);
8150         cache_tree_init(&seen);
8151         cache_tree_init(&pending);
8152         cache_tree_init(&nodes);
8153         cache_tree_init(&reada);
8154         cache_tree_init(&corrupt_blocks);
8155         extent_io_tree_init(&excluded_extents);
8156         INIT_LIST_HEAD(&dropping_trees);
8157         INIT_LIST_HEAD(&normal_trees);
8158
8159         if (repair) {
8160                 root->fs_info->excluded_extents = &excluded_extents;
8161                 root->fs_info->fsck_extent_cache = &extent_cache;
8162                 root->fs_info->free_extent_hook = free_extent_hook;
8163                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8164         }
8165
8166         bits_nr = 1024;
8167         bits = malloc(bits_nr * sizeof(struct block_info));
8168         if (!bits) {
8169                 perror("malloc");
8170                 exit(1);
8171         }
8172
8173         if (ctx.progress_enabled) {
8174                 ctx.tp = TASK_EXTENTS;
8175                 task_start(ctx.info);
8176         }
8177
8178 again:
8179         root1 = root->fs_info->tree_root;
8180         level = btrfs_header_level(root1->node);
8181         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8182                                     root1->node->start, 0, level, 0,
8183                                     btrfs_level_size(root1, level), NULL);
8184         if (ret < 0)
8185                 goto out;
8186         root1 = root->fs_info->chunk_root;
8187         level = btrfs_header_level(root1->node);
8188         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8189                                     root1->node->start, 0, level, 0,
8190                                     btrfs_level_size(root1, level), NULL);
8191         if (ret < 0)
8192                 goto out;
8193         btrfs_init_path(&path);
8194         key.offset = 0;
8195         key.objectid = 0;
8196         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8197         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8198                                         &key, &path, 0, 0);
8199         if (ret < 0)
8200                 goto out;
8201         while(1) {
8202                 leaf = path.nodes[0];
8203                 slot = path.slots[0];
8204                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8205                         ret = btrfs_next_leaf(root, &path);
8206                         if (ret != 0)
8207                                 break;
8208                         leaf = path.nodes[0];
8209                         slot = path.slots[0];
8210                 }
8211                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8212                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8213                         unsigned long offset;
8214                         u64 last_snapshot;
8215
8216                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8217                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8218                         last_snapshot = btrfs_root_last_snapshot(&ri);
8219                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8220                                 level = btrfs_root_level(&ri);
8221                                 level_size = btrfs_level_size(root, level);
8222                                 ret = add_root_item_to_list(&normal_trees,
8223                                                 found_key.objectid,
8224                                                 btrfs_root_bytenr(&ri),
8225                                                 last_snapshot, level,
8226                                                 0, level_size, NULL);
8227                                 if (ret < 0)
8228                                         goto out;
8229                         } else {
8230                                 level = btrfs_root_level(&ri);
8231                                 level_size = btrfs_level_size(root, level);
8232                                 objectid = found_key.objectid;
8233                                 btrfs_disk_key_to_cpu(&found_key,
8234                                                       &ri.drop_progress);
8235                                 ret = add_root_item_to_list(&dropping_trees,
8236                                                 objectid,
8237                                                 btrfs_root_bytenr(&ri),
8238                                                 last_snapshot, level,
8239                                                 ri.drop_level,
8240                                                 level_size, &found_key);
8241                                 if (ret < 0)
8242                                         goto out;
8243                         }
8244                 }
8245                 path.slots[0]++;
8246         }
8247         btrfs_release_path(&path);
8248
8249         /*
8250          * check_block can return -EAGAIN if it fixes something, please keep
8251          * this in mind when dealing with return values from these functions, if
8252          * we get -EAGAIN we want to fall through and restart the loop.
8253          */
8254         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8255                                   &seen, &reada, &nodes, &extent_cache,
8256                                   &chunk_cache, &dev_cache, &block_group_cache,
8257                                   &dev_extent_cache);
8258         if (ret < 0) {
8259                 if (ret == -EAGAIN)
8260                         goto loop;
8261                 goto out;
8262         }
8263         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8264                                   &pending, &seen, &reada, &nodes,
8265                                   &extent_cache, &chunk_cache, &dev_cache,
8266                                   &block_group_cache, &dev_extent_cache);
8267         if (ret < 0) {
8268                 if (ret == -EAGAIN)
8269                         goto loop;
8270                 goto out;
8271         }
8272
8273         ret = check_chunks(&chunk_cache, &block_group_cache,
8274                            &dev_extent_cache, NULL, NULL, NULL, 0);
8275         if (ret) {
8276                 if (ret == -EAGAIN)
8277                         goto loop;
8278                 err = ret;
8279         }
8280
8281         ret = check_extent_refs(root, &extent_cache);
8282         if (ret < 0) {
8283                 if (ret == -EAGAIN)
8284                         goto loop;
8285                 goto out;
8286         }
8287
8288         ret = check_devices(&dev_cache, &dev_extent_cache);
8289         if (ret && err)
8290                 ret = err;
8291
8292 out:
8293         task_stop(ctx.info);
8294         if (repair) {
8295                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8296                 extent_io_tree_cleanup(&excluded_extents);
8297                 root->fs_info->fsck_extent_cache = NULL;
8298                 root->fs_info->free_extent_hook = NULL;
8299                 root->fs_info->corrupt_blocks = NULL;
8300                 root->fs_info->excluded_extents = NULL;
8301         }
8302         free(bits);
8303         free_chunk_cache_tree(&chunk_cache);
8304         free_device_cache_tree(&dev_cache);
8305         free_block_group_tree(&block_group_cache);
8306         free_device_extent_tree(&dev_extent_cache);
8307         free_extent_cache_tree(&seen);
8308         free_extent_cache_tree(&pending);
8309         free_extent_cache_tree(&reada);
8310         free_extent_cache_tree(&nodes);
8311         return ret;
8312 loop:
8313         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8314         free_extent_cache_tree(&seen);
8315         free_extent_cache_tree(&pending);
8316         free_extent_cache_tree(&reada);
8317         free_extent_cache_tree(&nodes);
8318         free_chunk_cache_tree(&chunk_cache);
8319         free_block_group_tree(&block_group_cache);
8320         free_device_cache_tree(&dev_cache);
8321         free_device_extent_tree(&dev_extent_cache);
8322         free_extent_record_cache(root->fs_info, &extent_cache);
8323         free_root_item_list(&normal_trees);
8324         free_root_item_list(&dropping_trees);
8325         extent_io_tree_cleanup(&excluded_extents);
8326         goto again;
8327 }
8328
8329 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8330                            struct btrfs_root *root, int overwrite)
8331 {
8332         struct extent_buffer *c;
8333         struct extent_buffer *old = root->node;
8334         int level;
8335         int ret;
8336         struct btrfs_disk_key disk_key = {0,0,0};
8337
8338         level = 0;
8339
8340         if (overwrite) {
8341                 c = old;
8342                 extent_buffer_get(c);
8343                 goto init;
8344         }
8345         c = btrfs_alloc_free_block(trans, root,
8346                                    btrfs_level_size(root, 0),
8347                                    root->root_key.objectid,
8348                                    &disk_key, level, 0, 0);
8349         if (IS_ERR(c)) {
8350                 c = old;
8351                 extent_buffer_get(c);
8352                 overwrite = 1;
8353         }
8354 init:
8355         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8356         btrfs_set_header_level(c, level);
8357         btrfs_set_header_bytenr(c, c->start);
8358         btrfs_set_header_generation(c, trans->transid);
8359         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8360         btrfs_set_header_owner(c, root->root_key.objectid);
8361
8362         write_extent_buffer(c, root->fs_info->fsid,
8363                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8364
8365         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8366                             btrfs_header_chunk_tree_uuid(c),
8367                             BTRFS_UUID_SIZE);
8368
8369         btrfs_mark_buffer_dirty(c);
8370         /*
8371          * this case can happen in the following case:
8372          *
8373          * 1.overwrite previous root.
8374          *
8375          * 2.reinit reloc data root, this is because we skip pin
8376          * down reloc data tree before which means we can allocate
8377          * same block bytenr here.
8378          */
8379         if (old->start == c->start) {
8380                 btrfs_set_root_generation(&root->root_item,
8381                                           trans->transid);
8382                 root->root_item.level = btrfs_header_level(root->node);
8383                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8384                                         &root->root_key, &root->root_item);
8385                 if (ret) {
8386                         free_extent_buffer(c);
8387                         return ret;
8388                 }
8389         }
8390         free_extent_buffer(old);
8391         root->node = c;
8392         add_root_to_dirty_list(root);
8393         return 0;
8394 }
8395
8396 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8397                                 struct extent_buffer *eb, int tree_root)
8398 {
8399         struct extent_buffer *tmp;
8400         struct btrfs_root_item *ri;
8401         struct btrfs_key key;
8402         u64 bytenr;
8403         u32 leafsize;
8404         int level = btrfs_header_level(eb);
8405         int nritems;
8406         int ret;
8407         int i;
8408
8409         /*
8410          * If we have pinned this block before, don't pin it again.
8411          * This can not only avoid forever loop with broken filesystem
8412          * but also give us some speedups.
8413          */
8414         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8415                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8416                 return 0;
8417
8418         btrfs_pin_extent(fs_info, eb->start, eb->len);
8419
8420         leafsize = btrfs_super_leafsize(fs_info->super_copy);
8421         nritems = btrfs_header_nritems(eb);
8422         for (i = 0; i < nritems; i++) {
8423                 if (level == 0) {
8424                         btrfs_item_key_to_cpu(eb, &key, i);
8425                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8426                                 continue;
8427                         /* Skip the extent root and reloc roots */
8428                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8429                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8430                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8431                                 continue;
8432                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8433                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8434
8435                         /*
8436                          * If at any point we start needing the real root we
8437                          * will have to build a stump root for the root we are
8438                          * in, but for now this doesn't actually use the root so
8439                          * just pass in extent_root.
8440                          */
8441                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8442                                               leafsize, 0);
8443                         if (!extent_buffer_uptodate(tmp)) {
8444                                 fprintf(stderr, "Error reading root block\n");
8445                                 return -EIO;
8446                         }
8447                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8448                         free_extent_buffer(tmp);
8449                         if (ret)
8450                                 return ret;
8451                 } else {
8452                         bytenr = btrfs_node_blockptr(eb, i);
8453
8454                         /* If we aren't the tree root don't read the block */
8455                         if (level == 1 && !tree_root) {
8456                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
8457                                 continue;
8458                         }
8459
8460                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8461                                               leafsize, 0);
8462                         if (!extent_buffer_uptodate(tmp)) {
8463                                 fprintf(stderr, "Error reading tree block\n");
8464                                 return -EIO;
8465                         }
8466                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8467                         free_extent_buffer(tmp);
8468                         if (ret)
8469                                 return ret;
8470                 }
8471         }
8472
8473         return 0;
8474 }
8475
8476 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8477 {
8478         int ret;
8479
8480         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8481         if (ret)
8482                 return ret;
8483
8484         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8485 }
8486
8487 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8488 {
8489         struct btrfs_block_group_cache *cache;
8490         struct btrfs_path *path;
8491         struct extent_buffer *leaf;
8492         struct btrfs_chunk *chunk;
8493         struct btrfs_key key;
8494         int ret;
8495         u64 start;
8496
8497         path = btrfs_alloc_path();
8498         if (!path)
8499                 return -ENOMEM;
8500
8501         key.objectid = 0;
8502         key.type = BTRFS_CHUNK_ITEM_KEY;
8503         key.offset = 0;
8504
8505         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8506         if (ret < 0) {
8507                 btrfs_free_path(path);
8508                 return ret;
8509         }
8510
8511         /*
8512          * We do this in case the block groups were screwed up and had alloc
8513          * bits that aren't actually set on the chunks.  This happens with
8514          * restored images every time and could happen in real life I guess.
8515          */
8516         fs_info->avail_data_alloc_bits = 0;
8517         fs_info->avail_metadata_alloc_bits = 0;
8518         fs_info->avail_system_alloc_bits = 0;
8519
8520         /* First we need to create the in-memory block groups */
8521         while (1) {
8522                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8523                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8524                         if (ret < 0) {
8525                                 btrfs_free_path(path);
8526                                 return ret;
8527                         }
8528                         if (ret) {
8529                                 ret = 0;
8530                                 break;
8531                         }
8532                 }
8533                 leaf = path->nodes[0];
8534                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8535                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8536                         path->slots[0]++;
8537                         continue;
8538                 }
8539
8540                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8541                                        struct btrfs_chunk);
8542                 btrfs_add_block_group(fs_info, 0,
8543                                       btrfs_chunk_type(leaf, chunk),
8544                                       key.objectid, key.offset,
8545                                       btrfs_chunk_length(leaf, chunk));
8546                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8547                                  key.offset + btrfs_chunk_length(leaf, chunk),
8548                                  GFP_NOFS);
8549                 path->slots[0]++;
8550         }
8551         start = 0;
8552         while (1) {
8553                 cache = btrfs_lookup_first_block_group(fs_info, start);
8554                 if (!cache)
8555                         break;
8556                 cache->cached = 1;
8557                 start = cache->key.objectid + cache->key.offset;
8558         }
8559
8560         btrfs_free_path(path);
8561         return 0;
8562 }
8563
8564 static int reset_balance(struct btrfs_trans_handle *trans,
8565                          struct btrfs_fs_info *fs_info)
8566 {
8567         struct btrfs_root *root = fs_info->tree_root;
8568         struct btrfs_path *path;
8569         struct extent_buffer *leaf;
8570         struct btrfs_key key;
8571         int del_slot, del_nr = 0;
8572         int ret;
8573         int found = 0;
8574
8575         path = btrfs_alloc_path();
8576         if (!path)
8577                 return -ENOMEM;
8578
8579         key.objectid = BTRFS_BALANCE_OBJECTID;
8580         key.type = BTRFS_BALANCE_ITEM_KEY;
8581         key.offset = 0;
8582
8583         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8584         if (ret) {
8585                 if (ret > 0)
8586                         ret = 0;
8587                 if (!ret)
8588                         goto reinit_data_reloc;
8589                 else
8590                         goto out;
8591         }
8592
8593         ret = btrfs_del_item(trans, root, path);
8594         if (ret)
8595                 goto out;
8596         btrfs_release_path(path);
8597
8598         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8599         key.type = BTRFS_ROOT_ITEM_KEY;
8600         key.offset = 0;
8601
8602         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8603         if (ret < 0)
8604                 goto out;
8605         while (1) {
8606                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8607                         if (!found)
8608                                 break;
8609
8610                         if (del_nr) {
8611                                 ret = btrfs_del_items(trans, root, path,
8612                                                       del_slot, del_nr);
8613                                 del_nr = 0;
8614                                 if (ret)
8615                                         goto out;
8616                         }
8617                         key.offset++;
8618                         btrfs_release_path(path);
8619
8620                         found = 0;
8621                         ret = btrfs_search_slot(trans, root, &key, path,
8622                                                 -1, 1);
8623                         if (ret < 0)
8624                                 goto out;
8625                         continue;
8626                 }
8627                 found = 1;
8628                 leaf = path->nodes[0];
8629                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8630                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8631                         break;
8632                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8633                         path->slots[0]++;
8634                         continue;
8635                 }
8636                 if (!del_nr) {
8637                         del_slot = path->slots[0];
8638                         del_nr = 1;
8639                 } else {
8640                         del_nr++;
8641                 }
8642                 path->slots[0]++;
8643         }
8644
8645         if (del_nr) {
8646                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8647                 if (ret)
8648                         goto out;
8649         }
8650         btrfs_release_path(path);
8651
8652 reinit_data_reloc:
8653         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8654         key.type = BTRFS_ROOT_ITEM_KEY;
8655         key.offset = (u64)-1;
8656         root = btrfs_read_fs_root(fs_info, &key);
8657         if (IS_ERR(root)) {
8658                 fprintf(stderr, "Error reading data reloc tree\n");
8659                 ret = PTR_ERR(root);
8660                 goto out;
8661         }
8662         record_root_in_trans(trans, root);
8663         ret = btrfs_fsck_reinit_root(trans, root, 0);
8664         if (ret)
8665                 goto out;
8666         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8667 out:
8668         btrfs_free_path(path);
8669         return ret;
8670 }
8671
8672 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8673                               struct btrfs_fs_info *fs_info)
8674 {
8675         u64 start = 0;
8676         int ret;
8677
8678         /*
8679          * The only reason we don't do this is because right now we're just
8680          * walking the trees we find and pinning down their bytes, we don't look
8681          * at any of the leaves.  In order to do mixed groups we'd have to check
8682          * the leaves of any fs roots and pin down the bytes for any file
8683          * extents we find.  Not hard but why do it if we don't have to?
8684          */
8685         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8686                 fprintf(stderr, "We don't support re-initing the extent tree "
8687                         "for mixed block groups yet, please notify a btrfs "
8688                         "developer you want to do this so they can add this "
8689                         "functionality.\n");
8690                 return -EINVAL;
8691         }
8692
8693         /*
8694          * first we need to walk all of the trees except the extent tree and pin
8695          * down the bytes that are in use so we don't overwrite any existing
8696          * metadata.
8697          */
8698         ret = pin_metadata_blocks(fs_info);
8699         if (ret) {
8700                 fprintf(stderr, "error pinning down used bytes\n");
8701                 return ret;
8702         }
8703
8704         /*
8705          * Need to drop all the block groups since we're going to recreate all
8706          * of them again.
8707          */
8708         btrfs_free_block_groups(fs_info);
8709         ret = reset_block_groups(fs_info);
8710         if (ret) {
8711                 fprintf(stderr, "error resetting the block groups\n");
8712                 return ret;
8713         }
8714
8715         /* Ok we can allocate now, reinit the extent root */
8716         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8717         if (ret) {
8718                 fprintf(stderr, "extent root initialization failed\n");
8719                 /*
8720                  * When the transaction code is updated we should end the
8721                  * transaction, but for now progs only knows about commit so
8722                  * just return an error.
8723                  */
8724                 return ret;
8725         }
8726
8727         /*
8728          * Now we have all the in-memory block groups setup so we can make
8729          * allocations properly, and the metadata we care about is safe since we
8730          * pinned all of it above.
8731          */
8732         while (1) {
8733                 struct btrfs_block_group_cache *cache;
8734
8735                 cache = btrfs_lookup_first_block_group(fs_info, start);
8736                 if (!cache)
8737                         break;
8738                 start = cache->key.objectid + cache->key.offset;
8739                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8740                                         &cache->key, &cache->item,
8741                                         sizeof(cache->item));
8742                 if (ret) {
8743                         fprintf(stderr, "Error adding block group\n");
8744                         return ret;
8745                 }
8746                 btrfs_extent_post_op(trans, fs_info->extent_root);
8747         }
8748
8749         ret = reset_balance(trans, fs_info);
8750         if (ret)
8751                 fprintf(stderr, "error reseting the pending balance\n");
8752
8753         return ret;
8754 }
8755
8756 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8757 {
8758         struct btrfs_path *path;
8759         struct btrfs_trans_handle *trans;
8760         struct btrfs_key key;
8761         int ret;
8762
8763         printf("Recowing metadata block %llu\n", eb->start);
8764         key.objectid = btrfs_header_owner(eb);
8765         key.type = BTRFS_ROOT_ITEM_KEY;
8766         key.offset = (u64)-1;
8767
8768         root = btrfs_read_fs_root(root->fs_info, &key);
8769         if (IS_ERR(root)) {
8770                 fprintf(stderr, "Couldn't find owner root %llu\n",
8771                         key.objectid);
8772                 return PTR_ERR(root);
8773         }
8774
8775         path = btrfs_alloc_path();
8776         if (!path)
8777                 return -ENOMEM;
8778
8779         trans = btrfs_start_transaction(root, 1);
8780         if (IS_ERR(trans)) {
8781                 btrfs_free_path(path);
8782                 return PTR_ERR(trans);
8783         }
8784
8785         path->lowest_level = btrfs_header_level(eb);
8786         if (path->lowest_level)
8787                 btrfs_node_key_to_cpu(eb, &key, 0);
8788         else
8789                 btrfs_item_key_to_cpu(eb, &key, 0);
8790
8791         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8792         btrfs_commit_transaction(trans, root);
8793         btrfs_free_path(path);
8794         return ret;
8795 }
8796
8797 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8798 {
8799         struct btrfs_path *path;
8800         struct btrfs_trans_handle *trans;
8801         struct btrfs_key key;
8802         int ret;
8803
8804         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8805                bad->key.type, bad->key.offset);
8806         key.objectid = bad->root_id;
8807         key.type = BTRFS_ROOT_ITEM_KEY;
8808         key.offset = (u64)-1;
8809
8810         root = btrfs_read_fs_root(root->fs_info, &key);
8811         if (IS_ERR(root)) {
8812                 fprintf(stderr, "Couldn't find owner root %llu\n",
8813                         key.objectid);
8814                 return PTR_ERR(root);
8815         }
8816
8817         path = btrfs_alloc_path();
8818         if (!path)
8819                 return -ENOMEM;
8820
8821         trans = btrfs_start_transaction(root, 1);
8822         if (IS_ERR(trans)) {
8823                 btrfs_free_path(path);
8824                 return PTR_ERR(trans);
8825         }
8826
8827         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8828         if (ret) {
8829                 if (ret > 0)
8830                         ret = 0;
8831                 goto out;
8832         }
8833         ret = btrfs_del_item(trans, root, path);
8834 out:
8835         btrfs_commit_transaction(trans, root);
8836         btrfs_free_path(path);
8837         return ret;
8838 }
8839
8840 static int zero_log_tree(struct btrfs_root *root)
8841 {
8842         struct btrfs_trans_handle *trans;
8843         int ret;
8844
8845         trans = btrfs_start_transaction(root, 1);
8846         if (IS_ERR(trans)) {
8847                 ret = PTR_ERR(trans);
8848                 return ret;
8849         }
8850         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8851         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8852         ret = btrfs_commit_transaction(trans, root);
8853         return ret;
8854 }
8855
8856 static int populate_csum(struct btrfs_trans_handle *trans,
8857                          struct btrfs_root *csum_root, char *buf, u64 start,
8858                          u64 len)
8859 {
8860         u64 offset = 0;
8861         u64 sectorsize;
8862         int ret = 0;
8863
8864         while (offset < len) {
8865                 sectorsize = csum_root->sectorsize;
8866                 ret = read_extent_data(csum_root, buf, start + offset,
8867                                        &sectorsize, 0);
8868                 if (ret)
8869                         break;
8870                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8871                                             start + offset, buf, sectorsize);
8872                 if (ret)
8873                         break;
8874                 offset += sectorsize;
8875         }
8876         return ret;
8877 }
8878
8879 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8880                                       struct btrfs_root *csum_root,
8881                                       struct btrfs_root *cur_root)
8882 {
8883         struct btrfs_path *path;
8884         struct btrfs_key key;
8885         struct extent_buffer *node;
8886         struct btrfs_file_extent_item *fi;
8887         char *buf = NULL;
8888         u64 start = 0;
8889         u64 len = 0;
8890         int slot = 0;
8891         int ret = 0;
8892
8893         path = btrfs_alloc_path();
8894         if (!path)
8895                 return -ENOMEM;
8896         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8897         if (!buf) {
8898                 ret = -ENOMEM;
8899                 goto out;
8900         }
8901
8902         key.objectid = 0;
8903         key.offset = 0;
8904         key.type = 0;
8905
8906         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8907         if (ret < 0)
8908                 goto out;
8909         /* Iterate all regular file extents and fill its csum */
8910         while (1) {
8911                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8912
8913                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8914                         goto next;
8915                 node = path->nodes[0];
8916                 slot = path->slots[0];
8917                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8918                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8919                         goto next;
8920                 start = btrfs_file_extent_disk_bytenr(node, fi);
8921                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8922
8923                 ret = populate_csum(trans, csum_root, buf, start, len);
8924                 if (ret == -EEXIST)
8925                         ret = 0;
8926                 if (ret < 0)
8927                         goto out;
8928 next:
8929                 /*
8930                  * TODO: if next leaf is corrupted, jump to nearest next valid
8931                  * leaf.
8932                  */
8933                 ret = btrfs_next_item(cur_root, path);
8934                 if (ret < 0)
8935                         goto out;
8936                 if (ret > 0) {
8937                         ret = 0;
8938                         goto out;
8939                 }
8940         }
8941
8942 out:
8943         btrfs_free_path(path);
8944         free(buf);
8945         return ret;
8946 }
8947
8948 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8949                                   struct btrfs_root *csum_root)
8950 {
8951         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8952         struct btrfs_path *path;
8953         struct btrfs_root *tree_root = fs_info->tree_root;
8954         struct btrfs_root *cur_root;
8955         struct extent_buffer *node;
8956         struct btrfs_key key;
8957         int slot = 0;
8958         int ret = 0;
8959
8960         path = btrfs_alloc_path();
8961         if (!path)
8962                 return -ENOMEM;
8963
8964         key.objectid = BTRFS_FS_TREE_OBJECTID;
8965         key.offset = 0;
8966         key.type = BTRFS_ROOT_ITEM_KEY;
8967
8968         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8969         if (ret < 0)
8970                 goto out;
8971         if (ret > 0) {
8972                 ret = -ENOENT;
8973                 goto out;
8974         }
8975
8976         while (1) {
8977                 node = path->nodes[0];
8978                 slot = path->slots[0];
8979                 btrfs_item_key_to_cpu(node, &key, slot);
8980                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8981                         goto out;
8982                 if (key.type != BTRFS_ROOT_ITEM_KEY)
8983                         goto next;
8984                 if (!is_fstree(key.objectid))
8985                         goto next;
8986                 key.offset = (u64)-1;
8987
8988                 cur_root = btrfs_read_fs_root(fs_info, &key);
8989                 if (IS_ERR(cur_root) || !cur_root) {
8990                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8991                                 key.objectid);
8992                         goto out;
8993                 }
8994                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8995                                 cur_root);
8996                 if (ret < 0)
8997                         goto out;
8998 next:
8999                 ret = btrfs_next_item(tree_root, path);
9000                 if (ret > 0) {
9001                         ret = 0;
9002                         goto out;
9003                 }
9004                 if (ret < 0)
9005                         goto out;
9006         }
9007
9008 out:
9009         btrfs_free_path(path);
9010         return ret;
9011 }
9012
9013 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9014                                       struct btrfs_root *csum_root)
9015 {
9016         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9017         struct btrfs_path *path;
9018         struct btrfs_extent_item *ei;
9019         struct extent_buffer *leaf;
9020         char *buf;
9021         struct btrfs_key key;
9022         int ret;
9023
9024         path = btrfs_alloc_path();
9025         if (!path)
9026                 return -ENOMEM;
9027
9028         key.objectid = 0;
9029         key.type = BTRFS_EXTENT_ITEM_KEY;
9030         key.offset = 0;
9031
9032         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9033         if (ret < 0) {
9034                 btrfs_free_path(path);
9035                 return ret;
9036         }
9037
9038         buf = malloc(csum_root->sectorsize);
9039         if (!buf) {
9040                 btrfs_free_path(path);
9041                 return -ENOMEM;
9042         }
9043
9044         while (1) {
9045                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9046                         ret = btrfs_next_leaf(extent_root, path);
9047                         if (ret < 0)
9048                                 break;
9049                         if (ret) {
9050                                 ret = 0;
9051                                 break;
9052                         }
9053                 }
9054                 leaf = path->nodes[0];
9055
9056                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9057                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9058                         path->slots[0]++;
9059                         continue;
9060                 }
9061
9062                 ei = btrfs_item_ptr(leaf, path->slots[0],
9063                                     struct btrfs_extent_item);
9064                 if (!(btrfs_extent_flags(leaf, ei) &
9065                       BTRFS_EXTENT_FLAG_DATA)) {
9066                         path->slots[0]++;
9067                         continue;
9068                 }
9069
9070                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9071                                     key.offset);
9072                 if (ret)
9073                         break;
9074                 path->slots[0]++;
9075         }
9076
9077         btrfs_free_path(path);
9078         free(buf);
9079         return ret;
9080 }
9081
9082 /*
9083  * Recalculate the csum and put it into the csum tree.
9084  *
9085  * Extent tree init will wipe out all the extent info, so in that case, we
9086  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9087  * will use fs/subvol trees to init the csum tree.
9088  */
9089 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9090                           struct btrfs_root *csum_root,
9091                           int search_fs_tree)
9092 {
9093         if (search_fs_tree)
9094                 return fill_csum_tree_from_fs(trans, csum_root);
9095         else
9096                 return fill_csum_tree_from_extent(trans, csum_root);
9097 }
9098
9099 struct root_item_info {
9100         /* level of the root */
9101         u8 level;
9102         /* number of nodes at this level, must be 1 for a root */
9103         int node_count;
9104         u64 bytenr;
9105         u64 gen;
9106         struct cache_extent cache_extent;
9107 };
9108
9109 static struct cache_tree *roots_info_cache = NULL;
9110
9111 static void free_roots_info_cache(void)
9112 {
9113         if (!roots_info_cache)
9114                 return;
9115
9116         while (!cache_tree_empty(roots_info_cache)) {
9117                 struct cache_extent *entry;
9118                 struct root_item_info *rii;
9119
9120                 entry = first_cache_extent(roots_info_cache);
9121                 if (!entry)
9122                         break;
9123                 remove_cache_extent(roots_info_cache, entry);
9124                 rii = container_of(entry, struct root_item_info, cache_extent);
9125                 free(rii);
9126         }
9127
9128         free(roots_info_cache);
9129         roots_info_cache = NULL;
9130 }
9131
9132 static int build_roots_info_cache(struct btrfs_fs_info *info)
9133 {
9134         int ret = 0;
9135         struct btrfs_key key;
9136         struct extent_buffer *leaf;
9137         struct btrfs_path *path;
9138
9139         if (!roots_info_cache) {
9140                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9141                 if (!roots_info_cache)
9142                         return -ENOMEM;
9143                 cache_tree_init(roots_info_cache);
9144         }
9145
9146         path = btrfs_alloc_path();
9147         if (!path)
9148                 return -ENOMEM;
9149
9150         key.objectid = 0;
9151         key.type = BTRFS_EXTENT_ITEM_KEY;
9152         key.offset = 0;
9153
9154         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9155         if (ret < 0)
9156                 goto out;
9157         leaf = path->nodes[0];
9158
9159         while (1) {
9160                 struct btrfs_key found_key;
9161                 struct btrfs_extent_item *ei;
9162                 struct btrfs_extent_inline_ref *iref;
9163                 int slot = path->slots[0];
9164                 int type;
9165                 u64 flags;
9166                 u64 root_id;
9167                 u8 level;
9168                 struct cache_extent *entry;
9169                 struct root_item_info *rii;
9170
9171                 if (slot >= btrfs_header_nritems(leaf)) {
9172                         ret = btrfs_next_leaf(info->extent_root, path);
9173                         if (ret < 0) {
9174                                 break;
9175                         } else if (ret) {
9176                                 ret = 0;
9177                                 break;
9178                         }
9179                         leaf = path->nodes[0];
9180                         slot = path->slots[0];
9181                 }
9182
9183                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9184
9185                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9186                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9187                         goto next;
9188
9189                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9190                 flags = btrfs_extent_flags(leaf, ei);
9191
9192                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9193                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9194                         goto next;
9195
9196                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9197                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9198                         level = found_key.offset;
9199                 } else {
9200                         struct btrfs_tree_block_info *info;
9201
9202                         info = (struct btrfs_tree_block_info *)(ei + 1);
9203                         iref = (struct btrfs_extent_inline_ref *)(info + 1);
9204                         level = btrfs_tree_block_level(leaf, info);
9205                 }
9206
9207                 /*
9208                  * For a root extent, it must be of the following type and the
9209                  * first (and only one) iref in the item.
9210                  */
9211                 type = btrfs_extent_inline_ref_type(leaf, iref);
9212                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9213                         goto next;
9214
9215                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9216                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9217                 if (!entry) {
9218                         rii = malloc(sizeof(struct root_item_info));
9219                         if (!rii) {
9220                                 ret = -ENOMEM;
9221                                 goto out;
9222                         }
9223                         rii->cache_extent.start = root_id;
9224                         rii->cache_extent.size = 1;
9225                         rii->level = (u8)-1;
9226                         entry = &rii->cache_extent;
9227                         ret = insert_cache_extent(roots_info_cache, entry);
9228                         ASSERT(ret == 0);
9229                 } else {
9230                         rii = container_of(entry, struct root_item_info,
9231                                            cache_extent);
9232                 }
9233
9234                 ASSERT(rii->cache_extent.start == root_id);
9235                 ASSERT(rii->cache_extent.size == 1);
9236
9237                 if (level > rii->level || rii->level == (u8)-1) {
9238                         rii->level = level;
9239                         rii->bytenr = found_key.objectid;
9240                         rii->gen = btrfs_extent_generation(leaf, ei);
9241                         rii->node_count = 1;
9242                 } else if (level == rii->level) {
9243                         rii->node_count++;
9244                 }
9245 next:
9246                 path->slots[0]++;
9247         }
9248
9249 out:
9250         btrfs_free_path(path);
9251
9252         return ret;
9253 }
9254
9255 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9256                                   struct btrfs_path *path,
9257                                   const struct btrfs_key *root_key,
9258                                   const int read_only_mode)
9259 {
9260         const u64 root_id = root_key->objectid;
9261         struct cache_extent *entry;
9262         struct root_item_info *rii;
9263         struct btrfs_root_item ri;
9264         unsigned long offset;
9265
9266         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9267         if (!entry) {
9268                 fprintf(stderr,
9269                         "Error: could not find extent items for root %llu\n",
9270                         root_key->objectid);
9271                 return -ENOENT;
9272         }
9273
9274         rii = container_of(entry, struct root_item_info, cache_extent);
9275         ASSERT(rii->cache_extent.start == root_id);
9276         ASSERT(rii->cache_extent.size == 1);
9277
9278         if (rii->node_count != 1) {
9279                 fprintf(stderr,
9280                         "Error: could not find btree root extent for root %llu\n",
9281                         root_id);
9282                 return -ENOENT;
9283         }
9284
9285         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9286         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9287
9288         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9289             btrfs_root_level(&ri) != rii->level ||
9290             btrfs_root_generation(&ri) != rii->gen) {
9291
9292                 /*
9293                  * If we're in repair mode but our caller told us to not update
9294                  * the root item, i.e. just check if it needs to be updated, don't
9295                  * print this message, since the caller will call us again shortly
9296                  * for the same root item without read only mode (the caller will
9297                  * open a transaction first).
9298                  */
9299                 if (!(read_only_mode && repair))
9300                         fprintf(stderr,
9301                                 "%sroot item for root %llu,"
9302                                 " current bytenr %llu, current gen %llu, current level %u,"
9303                                 " new bytenr %llu, new gen %llu, new level %u\n",
9304                                 (read_only_mode ? "" : "fixing "),
9305                                 root_id,
9306                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9307                                 btrfs_root_level(&ri),
9308                                 rii->bytenr, rii->gen, rii->level);
9309
9310                 if (btrfs_root_generation(&ri) > rii->gen) {
9311                         fprintf(stderr,
9312                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9313                                 root_id, btrfs_root_generation(&ri), rii->gen);
9314                         return -EINVAL;
9315                 }
9316
9317                 if (!read_only_mode) {
9318                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9319                         btrfs_set_root_level(&ri, rii->level);
9320                         btrfs_set_root_generation(&ri, rii->gen);
9321                         write_extent_buffer(path->nodes[0], &ri,
9322                                             offset, sizeof(ri));
9323                 }
9324
9325                 return 1;
9326         }
9327
9328         return 0;
9329 }
9330
9331 /*
9332  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9333  * caused read-only snapshots to be corrupted if they were created at a moment
9334  * when the source subvolume/snapshot had orphan items. The issue was that the
9335  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9336  * node instead of the post orphan cleanup root node.
9337  * So this function, and its callees, just detects and fixes those cases. Even
9338  * though the regression was for read-only snapshots, this function applies to
9339  * any snapshot/subvolume root.
9340  * This must be run before any other repair code - not doing it so, makes other
9341  * repair code delete or modify backrefs in the extent tree for example, which
9342  * will result in an inconsistent fs after repairing the root items.
9343  */
9344 static int repair_root_items(struct btrfs_fs_info *info)
9345 {
9346         struct btrfs_path *path = NULL;
9347         struct btrfs_key key;
9348         struct extent_buffer *leaf;
9349         struct btrfs_trans_handle *trans = NULL;
9350         int ret = 0;
9351         int bad_roots = 0;
9352         int need_trans = 0;
9353
9354         ret = build_roots_info_cache(info);
9355         if (ret)
9356                 goto out;
9357
9358         path = btrfs_alloc_path();
9359         if (!path) {
9360                 ret = -ENOMEM;
9361                 goto out;
9362         }
9363
9364         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9365         key.type = BTRFS_ROOT_ITEM_KEY;
9366         key.offset = 0;
9367
9368 again:
9369         /*
9370          * Avoid opening and committing transactions if a leaf doesn't have
9371          * any root items that need to be fixed, so that we avoid rotating
9372          * backup roots unnecessarily.
9373          */
9374         if (need_trans) {
9375                 trans = btrfs_start_transaction(info->tree_root, 1);
9376                 if (IS_ERR(trans)) {
9377                         ret = PTR_ERR(trans);
9378                         goto out;
9379                 }
9380         }
9381
9382         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9383                                 0, trans ? 1 : 0);
9384         if (ret < 0)
9385                 goto out;
9386         leaf = path->nodes[0];
9387
9388         while (1) {
9389                 struct btrfs_key found_key;
9390
9391                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9392                         int no_more_keys = find_next_key(path, &key);
9393
9394                         btrfs_release_path(path);
9395                         if (trans) {
9396                                 ret = btrfs_commit_transaction(trans,
9397                                                                info->tree_root);
9398                                 trans = NULL;
9399                                 if (ret < 0)
9400                                         goto out;
9401                         }
9402                         need_trans = 0;
9403                         if (no_more_keys)
9404                                 break;
9405                         goto again;
9406                 }
9407
9408                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9409
9410                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9411                         goto next;
9412                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9413                         goto next;
9414
9415                 ret = maybe_repair_root_item(info, path, &found_key,
9416                                              trans ? 0 : 1);
9417                 if (ret < 0)
9418                         goto out;
9419                 if (ret) {
9420                         if (!trans && repair) {
9421                                 need_trans = 1;
9422                                 key = found_key;
9423                                 btrfs_release_path(path);
9424                                 goto again;
9425                         }
9426                         bad_roots++;
9427                 }
9428 next:
9429                 path->slots[0]++;
9430         }
9431         ret = 0;
9432 out:
9433         free_roots_info_cache();
9434         btrfs_free_path(path);
9435         if (trans)
9436                 btrfs_commit_transaction(trans, info->tree_root);
9437         if (ret < 0)
9438                 return ret;
9439
9440         return bad_roots;
9441 }
9442
9443 const char * const cmd_check_usage[] = {
9444         "btrfs check [options] <device>",
9445         "Check structural inegrity of a filesystem (unmounted).",
9446         "Check structural inegrity of an unmounted filesystem. Verify internal",
9447         "trees' consistency and item connectivity. In the repair mode try to",
9448         "fix the problems found.",
9449         "WARNING: the repair mode is considered dangerous",
9450         "",
9451         "-s|--super <superblock>     use this superblock copy",
9452         "-b|--backup                 use the backup root copy",
9453         "--repair                    try to repair the filesystem",
9454         "--readonly                  run in read-only mode (default)",
9455         "--init-csum-tree            create a new CRC tree",
9456         "--init-extent-tree          create a new extent tree",
9457         "--check-data-csum           verify checkums of data blocks",
9458         "-Q|--qgroup-report           print a report on qgroup consistency",
9459         "-E|--subvol-extents <subvolid>",
9460         "                            print subvolume extents and sharing state",
9461         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9462         "-p|--progress               indicate progress",
9463         NULL
9464 };
9465
9466 int cmd_check(int argc, char **argv)
9467 {
9468         struct cache_tree root_cache;
9469         struct btrfs_root *root;
9470         struct btrfs_fs_info *info;
9471         u64 bytenr = 0;
9472         u64 subvolid = 0;
9473         u64 tree_root_bytenr = 0;
9474         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9475         int ret;
9476         u64 num;
9477         int init_csum_tree = 0;
9478         int readonly = 0;
9479         int qgroup_report = 0;
9480         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9481
9482         while(1) {
9483                 int c;
9484                 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9485                         OPT_CHECK_CSUM, OPT_READONLY };
9486                 static const struct option long_options[] = {
9487                         { "super", required_argument, NULL, 's' },
9488                         { "repair", no_argument, NULL, OPT_REPAIR },
9489                         { "readonly", no_argument, NULL, OPT_READONLY },
9490                         { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9491                         { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9492                         { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9493                         { "backup", no_argument, NULL, 'b' },
9494                         { "subvol-extents", required_argument, NULL, 'E' },
9495                         { "qgroup-report", no_argument, NULL, 'Q' },
9496                         { "tree-root", required_argument, NULL, 'r' },
9497                         { "progress", no_argument, NULL, 'p' },
9498                         { NULL, 0, NULL, 0}
9499                 };
9500
9501                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9502                 if (c < 0)
9503                         break;
9504                 switch(c) {
9505                         case 'a': /* ignored */ break;
9506                         case 'b':
9507                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9508                                 break;
9509                         case 's':
9510                                 num = arg_strtou64(optarg);
9511                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9512                                         fprintf(stderr,
9513                                                 "ERROR: super mirror should be less than: %d\n",
9514                                                 BTRFS_SUPER_MIRROR_MAX);
9515                                         exit(1);
9516                                 }
9517                                 bytenr = btrfs_sb_offset(((int)num));
9518                                 printf("using SB copy %llu, bytenr %llu\n", num,
9519                                        (unsigned long long)bytenr);
9520                                 break;
9521                         case 'Q':
9522                                 qgroup_report = 1;
9523                                 break;
9524                         case 'E':
9525                                 subvolid = arg_strtou64(optarg);
9526                                 break;
9527                         case 'r':
9528                                 tree_root_bytenr = arg_strtou64(optarg);
9529                                 break;
9530                         case 'p':
9531                                 ctx.progress_enabled = true;
9532                                 break;
9533                         case '?':
9534                         case 'h':
9535                                 usage(cmd_check_usage);
9536                         case OPT_REPAIR:
9537                                 printf("enabling repair mode\n");
9538                                 repair = 1;
9539                                 ctree_flags |= OPEN_CTREE_WRITES;
9540                                 break;
9541                         case OPT_READONLY:
9542                                 readonly = 1;
9543                                 break;
9544                         case OPT_INIT_CSUM:
9545                                 printf("Creating a new CRC tree\n");
9546                                 init_csum_tree = 1;
9547                                 repair = 1;
9548                                 ctree_flags |= OPEN_CTREE_WRITES;
9549                                 break;
9550                         case OPT_INIT_EXTENT:
9551                                 init_extent_tree = 1;
9552                                 ctree_flags |= (OPEN_CTREE_WRITES |
9553                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9554                                 repair = 1;
9555                                 break;
9556                         case OPT_CHECK_CSUM:
9557                                 check_data_csum = 1;
9558                                 break;
9559                 }
9560         }
9561         argc = argc - optind;
9562
9563         if (check_argc_exact(argc, 1))
9564                 usage(cmd_check_usage);
9565
9566         if (ctx.progress_enabled) {
9567                 ctx.tp = TASK_NOTHING;
9568                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9569         }
9570
9571         /* This check is the only reason for --readonly to exist */
9572         if (readonly && repair) {
9573                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9574                 exit(1);
9575         }
9576
9577         radix_tree_init();
9578         cache_tree_init(&root_cache);
9579
9580         if((ret = check_mounted(argv[optind])) < 0) {
9581                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9582                 goto err_out;
9583         } else if(ret) {
9584                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9585                 ret = -EBUSY;
9586                 goto err_out;
9587         }
9588
9589         /* only allow partial opening under repair mode */
9590         if (repair)
9591                 ctree_flags |= OPEN_CTREE_PARTIAL;
9592
9593         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9594                                   ctree_flags);
9595         if (!info) {
9596                 fprintf(stderr, "Couldn't open file system\n");
9597                 ret = -EIO;
9598                 goto err_out;
9599         }
9600
9601         global_info = info;
9602         root = info->fs_root;
9603
9604         /*
9605          * repair mode will force us to commit transaction which
9606          * will make us fail to load log tree when mounting.
9607          */
9608         if (repair && btrfs_super_log_root(info->super_copy)) {
9609                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9610                 if (!ret) {
9611                         ret = 1;
9612                         goto close_out;
9613                 }
9614                 ret = zero_log_tree(root);
9615                 if (ret) {
9616                         fprintf(stderr, "fail to zero log tree\n");
9617                         goto close_out;
9618                 }
9619         }
9620
9621         uuid_unparse(info->super_copy->fsid, uuidbuf);
9622         if (qgroup_report) {
9623                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9624                        uuidbuf);
9625                 ret = qgroup_verify_all(info);
9626                 if (ret == 0)
9627                         print_qgroup_report(1);
9628                 goto close_out;
9629         }
9630         if (subvolid) {
9631                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9632                        subvolid, argv[optind], uuidbuf);
9633                 ret = print_extent_state(info, subvolid);
9634                 goto close_out;
9635         }
9636         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9637
9638         if (!extent_buffer_uptodate(info->tree_root->node) ||
9639             !extent_buffer_uptodate(info->dev_root->node) ||
9640             !extent_buffer_uptodate(info->chunk_root->node)) {
9641                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9642                 ret = -EIO;
9643                 goto close_out;
9644         }
9645
9646         if (init_extent_tree || init_csum_tree) {
9647                 struct btrfs_trans_handle *trans;
9648
9649                 trans = btrfs_start_transaction(info->extent_root, 0);
9650                 if (IS_ERR(trans)) {
9651                         fprintf(stderr, "Error starting transaction\n");
9652                         ret = PTR_ERR(trans);
9653                         goto close_out;
9654                 }
9655
9656                 if (init_extent_tree) {
9657                         printf("Creating a new extent tree\n");
9658                         ret = reinit_extent_tree(trans, info);
9659                         if (ret)
9660                                 goto close_out;
9661                 }
9662
9663                 if (init_csum_tree) {
9664                         fprintf(stderr, "Reinit crc root\n");
9665                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9666                         if (ret) {
9667                                 fprintf(stderr, "crc root initialization failed\n");
9668                                 ret = -EIO;
9669                                 goto close_out;
9670                         }
9671
9672                         ret = fill_csum_tree(trans, info->csum_root,
9673                                              init_extent_tree);
9674                         if (ret) {
9675                                 fprintf(stderr, "crc refilling failed\n");
9676                                 return -EIO;
9677                         }
9678                 }
9679                 /*
9680                  * Ok now we commit and run the normal fsck, which will add
9681                  * extent entries for all of the items it finds.
9682                  */
9683                 ret = btrfs_commit_transaction(trans, info->extent_root);
9684                 if (ret)
9685                         goto close_out;
9686         }
9687         if (!extent_buffer_uptodate(info->extent_root->node)) {
9688                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9689                 ret = -EIO;
9690                 goto close_out;
9691         }
9692         if (!extent_buffer_uptodate(info->csum_root->node)) {
9693                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9694                 ret = -EIO;
9695                 goto close_out;
9696         }
9697
9698         if (!ctx.progress_enabled)
9699                 fprintf(stderr, "checking extents\n");
9700         ret = check_chunks_and_extents(root);
9701         if (ret)
9702                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9703
9704         ret = repair_root_items(info);
9705         if (ret < 0)
9706                 goto close_out;
9707         if (repair) {
9708                 fprintf(stderr, "Fixed %d roots.\n", ret);
9709                 ret = 0;
9710         } else if (ret > 0) {
9711                 fprintf(stderr,
9712                        "Found %d roots with an outdated root item.\n",
9713                        ret);
9714                 fprintf(stderr,
9715                         "Please run a filesystem check with the option --repair to fix them.\n");
9716                 ret = 1;
9717                 goto close_out;
9718         }
9719
9720         if (!ctx.progress_enabled)
9721                 fprintf(stderr, "checking free space cache\n");
9722         ret = check_space_cache(root);
9723         if (ret)
9724                 goto out;
9725
9726         /*
9727          * We used to have to have these hole extents in between our real
9728          * extents so if we don't have this flag set we need to make sure there
9729          * are no gaps in the file extents for inodes, otherwise we can just
9730          * ignore it when this happens.
9731          */
9732         no_holes = btrfs_fs_incompat(root->fs_info,
9733                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9734         if (!ctx.progress_enabled)
9735                 fprintf(stderr, "checking fs roots\n");
9736         ret = check_fs_roots(root, &root_cache);
9737         if (ret)
9738                 goto out;
9739
9740         fprintf(stderr, "checking csums\n");
9741         ret = check_csums(root);
9742         if (ret)
9743                 goto out;
9744
9745         fprintf(stderr, "checking root refs\n");
9746         ret = check_root_refs(root, &root_cache);
9747         if (ret)
9748                 goto out;
9749
9750         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9751                 struct extent_buffer *eb;
9752
9753                 eb = list_first_entry(&root->fs_info->recow_ebs,
9754                                       struct extent_buffer, recow);
9755                 list_del_init(&eb->recow);
9756                 ret = recow_extent_buffer(root, eb);
9757                 if (ret)
9758                         break;
9759         }
9760
9761         while (!list_empty(&delete_items)) {
9762                 struct bad_item *bad;
9763
9764                 bad = list_first_entry(&delete_items, struct bad_item, list);
9765                 list_del_init(&bad->list);
9766                 if (repair)
9767                         ret = delete_bad_item(root, bad);
9768                 free(bad);
9769         }
9770
9771         if (info->quota_enabled) {
9772                 int err;
9773                 fprintf(stderr, "checking quota groups\n");
9774                 err = qgroup_verify_all(info);
9775                 if (err)
9776                         goto out;
9777         }
9778
9779         if (!list_empty(&root->fs_info->recow_ebs)) {
9780                 fprintf(stderr, "Transid errors in file system\n");
9781                 ret = 1;
9782         }
9783 out:
9784         print_qgroup_report(0);
9785         if (found_old_backref) { /*
9786                  * there was a disk format change when mixed
9787                  * backref was in testing tree. The old format
9788                  * existed about one week.
9789                  */
9790                 printf("\n * Found old mixed backref format. "
9791                        "The old format is not supported! *"
9792                        "\n * Please mount the FS in readonly mode, "
9793                        "backup data and re-format the FS. *\n\n");
9794                 ret = 1;
9795         }
9796         printf("found %llu bytes used err is %d\n",
9797                (unsigned long long)bytes_used, ret);
9798         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9799         printf("total tree bytes: %llu\n",
9800                (unsigned long long)total_btree_bytes);
9801         printf("total fs tree bytes: %llu\n",
9802                (unsigned long long)total_fs_tree_bytes);
9803         printf("total extent tree bytes: %llu\n",
9804                (unsigned long long)total_extent_tree_bytes);
9805         printf("btree space waste bytes: %llu\n",
9806                (unsigned long long)btree_space_waste);
9807         printf("file data blocks allocated: %llu\n referenced %llu\n",
9808                 (unsigned long long)data_bytes_allocated,
9809                 (unsigned long long)data_bytes_referenced);
9810
9811         free_root_recs_tree(&root_cache);
9812 close_out:
9813         close_ctree(root);
9814 err_out:
9815         if (ctx.progress_enabled)
9816                 task_deinit(ctx.info);
9817
9818         return ret;
9819 }