btrfs-progs: catch memory allocation failure from alloc_tree_backref
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "btrfsck.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
41 #include "backref.h"
42 #include "ulist.h"
43
44 enum task_position {
45         TASK_EXTENTS,
46         TASK_FREE_SPACE,
47         TASK_FS_ROOTS,
48         TASK_NOTHING, /* have to be the last element */
49 };
50
51 struct task_ctx {
52         int progress_enabled;
53         enum task_position tp;
54
55         struct task_info *info;
56 };
57
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75
76 static void *print_status_check(void *p)
77 {
78         struct task_ctx *priv = p;
79         const char work_indicator[] = { '.', 'o', 'O', 'o' };
80         uint32_t count = 0;
81         static char *task_position_string[] = {
82                 "checking extents",
83                 "checking free space cache",
84                 "checking fs roots",
85         };
86
87         task_period_start(priv->info, 1000 /* 1s */);
88
89         if (priv->tp == TASK_NOTHING)
90                 return NULL;
91
92         while (1) {
93                 printf("%s [%c]\r", task_position_string[priv->tp],
94                                 work_indicator[count % 4]);
95                 count++;
96                 fflush(stdout);
97                 task_period_wait(priv->info);
98         }
99         return NULL;
100 }
101
102 static int print_status_return(void *p)
103 {
104         printf("\n");
105         fflush(stdout);
106
107         return 0;
108 }
109
110 struct extent_backref {
111         struct list_head list;
112         unsigned int is_data:1;
113         unsigned int found_extent_tree:1;
114         unsigned int full_backref:1;
115         unsigned int found_ref:1;
116         unsigned int broken:1;
117 };
118
119 struct data_backref {
120         struct extent_backref node;
121         union {
122                 u64 parent;
123                 u64 root;
124         };
125         u64 owner;
126         u64 offset;
127         u64 disk_bytenr;
128         u64 bytes;
129         u64 ram_bytes;
130         u32 num_refs;
131         u32 found_ref;
132 };
133
134 /*
135  * Much like data_backref, just removed the undetermined members
136  * and change it to use list_head.
137  * During extent scan, it is stored in root->orphan_data_extent.
138  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
139  */
140 struct orphan_data_extent {
141         struct list_head list;
142         u64 root;
143         u64 objectid;
144         u64 offset;
145         u64 disk_bytenr;
146         u64 disk_len;
147 };
148
149 struct tree_backref {
150         struct extent_backref node;
151         union {
152                 u64 parent;
153                 u64 root;
154         };
155 };
156
157 struct extent_record {
158         struct list_head backrefs;
159         struct list_head dups;
160         struct list_head list;
161         struct cache_extent cache;
162         struct btrfs_disk_key parent_key;
163         u64 start;
164         u64 max_size;
165         u64 nr;
166         u64 refs;
167         u64 extent_item_refs;
168         u64 generation;
169         u64 parent_generation;
170         u64 info_objectid;
171         u32 num_duplicates;
172         u8 info_level;
173         int flag_block_full_backref;
174         unsigned int found_rec:1;
175         unsigned int content_checked:1;
176         unsigned int owner_ref_checked:1;
177         unsigned int is_root:1;
178         unsigned int metadata:1;
179         unsigned int bad_full_backref:1;
180         unsigned int crossing_stripes:1;
181         unsigned int wrong_chunk_type:1;
182 };
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         unsigned int filetype:8;
190         int errors;
191         unsigned int ref_type;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 struct root_item_record {
199         struct list_head list;
200         u64 objectid;
201         u64 bytenr;
202         u64 last_snapshot;
203         u8 level;
204         u8 drop_level;
205         int level_size;
206         struct btrfs_key drop_key;
207 };
208
209 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
211 #define REF_ERR_NO_INODE_REF            (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
214 #define REF_ERR_DUP_INODE_REF           (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF             (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
222
223 struct file_extent_hole {
224         struct rb_node node;
225         u64 start;
226         u64 len;
227 };
228
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
231 {
232         struct file_extent_hole *hole;
233
234         if (RB_EMPTY_ROOT(holes))
235                 return (u64)-1;
236
237         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
238         return hole->start;
239 }
240
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
242 {
243         struct file_extent_hole *hole1;
244         struct file_extent_hole *hole2;
245
246         hole1 = rb_entry(node1, struct file_extent_hole, node);
247         hole2 = rb_entry(node2, struct file_extent_hole, node);
248
249         if (hole1->start > hole2->start)
250                 return -1;
251         if (hole1->start < hole2->start)
252                 return 1;
253         /* Now hole1->start == hole2->start */
254         if (hole1->len >= hole2->len)
255                 /*
256                  * Hole 1 will be merge center
257                  * Same hole will be merged later
258                  */
259                 return -1;
260         /* Hole 2 will be merge center */
261         return 1;
262 }
263
264 /*
265  * Add a hole to the record
266  *
267  * This will do hole merge for copy_file_extent_holes(),
268  * which will ensure there won't be continuous holes.
269  */
270 static int add_file_extent_hole(struct rb_root *holes,
271                                 u64 start, u64 len)
272 {
273         struct file_extent_hole *hole;
274         struct file_extent_hole *prev = NULL;
275         struct file_extent_hole *next = NULL;
276
277         hole = malloc(sizeof(*hole));
278         if (!hole)
279                 return -ENOMEM;
280         hole->start = start;
281         hole->len = len;
282         /* Since compare will not return 0, no -EEXIST will happen */
283         rb_insert(holes, &hole->node, compare_hole);
284
285         /* simple merge with previous hole */
286         if (rb_prev(&hole->node))
287                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
288                                 node);
289         if (prev && prev->start + prev->len >= hole->start) {
290                 hole->len = hole->start + hole->len - prev->start;
291                 hole->start = prev->start;
292                 rb_erase(&prev->node, holes);
293                 free(prev);
294                 prev = NULL;
295         }
296
297         /* iterate merge with next holes */
298         while (1) {
299                 if (!rb_next(&hole->node))
300                         break;
301                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
302                                         node);
303                 if (hole->start + hole->len >= next->start) {
304                         if (hole->start + hole->len <= next->start + next->len)
305                                 hole->len = next->start + next->len -
306                                             hole->start;
307                         rb_erase(&next->node, holes);
308                         free(next);
309                         next = NULL;
310                 } else
311                         break;
312         }
313         return 0;
314 }
315
316 static int compare_hole_range(struct rb_node *node, void *data)
317 {
318         struct file_extent_hole *hole;
319         u64 start;
320
321         hole = (struct file_extent_hole *)data;
322         start = hole->start;
323
324         hole = rb_entry(node, struct file_extent_hole, node);
325         if (start < hole->start)
326                 return -1;
327         if (start >= hole->start && start < hole->start + hole->len)
328                 return 0;
329         return 1;
330 }
331
332 /*
333  * Delete a hole in the record
334  *
335  * This will do the hole split and is much restrict than add.
336  */
337 static int del_file_extent_hole(struct rb_root *holes,
338                                 u64 start, u64 len)
339 {
340         struct file_extent_hole *hole;
341         struct file_extent_hole tmp;
342         u64 prev_start = 0;
343         u64 prev_len = 0;
344         u64 next_start = 0;
345         u64 next_len = 0;
346         struct rb_node *node;
347         int have_prev = 0;
348         int have_next = 0;
349         int ret = 0;
350
351         tmp.start = start;
352         tmp.len = len;
353         node = rb_search(holes, &tmp, compare_hole_range, NULL);
354         if (!node)
355                 return -EEXIST;
356         hole = rb_entry(node, struct file_extent_hole, node);
357         if (start + len > hole->start + hole->len)
358                 return -EEXIST;
359
360         /*
361          * Now there will be no overflap, delete the hole and re-add the
362          * split(s) if they exists.
363          */
364         if (start > hole->start) {
365                 prev_start = hole->start;
366                 prev_len = start - hole->start;
367                 have_prev = 1;
368         }
369         if (hole->start + hole->len > start + len) {
370                 next_start = start + len;
371                 next_len = hole->start + hole->len - start - len;
372                 have_next = 1;
373         }
374         rb_erase(node, holes);
375         free(hole);
376         if (have_prev) {
377                 ret = add_file_extent_hole(holes, prev_start, prev_len);
378                 if (ret < 0)
379                         return ret;
380         }
381         if (have_next) {
382                 ret = add_file_extent_hole(holes, next_start, next_len);
383                 if (ret < 0)
384                         return ret;
385         }
386         return 0;
387 }
388
389 static int copy_file_extent_holes(struct rb_root *dst,
390                                   struct rb_root *src)
391 {
392         struct file_extent_hole *hole;
393         struct rb_node *node;
394         int ret = 0;
395
396         node = rb_first(src);
397         while (node) {
398                 hole = rb_entry(node, struct file_extent_hole, node);
399                 ret = add_file_extent_hole(dst, hole->start, hole->len);
400                 if (ret)
401                         break;
402                 node = rb_next(node);
403         }
404         return ret;
405 }
406
407 static void free_file_extent_holes(struct rb_root *holes)
408 {
409         struct rb_node *node;
410         struct file_extent_hole *hole;
411
412         node = rb_first(holes);
413         while (node) {
414                 hole = rb_entry(node, struct file_extent_hole, node);
415                 rb_erase(node, holes);
416                 free(hole);
417                 node = rb_first(holes);
418         }
419 }
420
421 struct inode_record {
422         struct list_head backrefs;
423         unsigned int checked:1;
424         unsigned int merging:1;
425         unsigned int found_inode_item:1;
426         unsigned int found_dir_item:1;
427         unsigned int found_file_extent:1;
428         unsigned int found_csum_item:1;
429         unsigned int some_csum_missing:1;
430         unsigned int nodatasum:1;
431         int errors;
432
433         u64 ino;
434         u32 nlink;
435         u32 imode;
436         u64 isize;
437         u64 nbytes;
438
439         u32 found_link;
440         u64 found_size;
441         u64 extent_start;
442         u64 extent_end;
443         struct rb_root holes;
444         struct list_head orphan_extents;
445
446         u32 refs;
447 };
448
449 #define I_ERR_NO_INODE_ITEM             (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
464
465 struct root_backref {
466         struct list_head list;
467         unsigned int found_dir_item:1;
468         unsigned int found_dir_index:1;
469         unsigned int found_back_ref:1;
470         unsigned int found_forward_ref:1;
471         unsigned int reachable:1;
472         int errors;
473         u64 ref_root;
474         u64 dir;
475         u64 index;
476         u16 namelen;
477         char name[0];
478 };
479
480 struct root_record {
481         struct list_head backrefs;
482         struct cache_extent cache;
483         unsigned int found_root_item:1;
484         u64 objectid;
485         u32 found_ref;
486 };
487
488 struct ptr_node {
489         struct cache_extent cache;
490         void *data;
491 };
492
493 struct shared_node {
494         struct cache_extent cache;
495         struct cache_tree root_cache;
496         struct cache_tree inode_cache;
497         struct inode_record *current;
498         u32 refs;
499 };
500
501 struct block_info {
502         u64 start;
503         u32 size;
504 };
505
506 struct walk_control {
507         struct cache_tree shared;
508         struct shared_node *nodes[BTRFS_MAX_LEVEL];
509         int active_node;
510         int root_level;
511 };
512
513 struct bad_item {
514         struct btrfs_key key;
515         u64 root_id;
516         struct list_head list;
517 };
518
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
520
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522                                  struct btrfs_root *root)
523 {
524         if (root->last_trans != trans->transid) {
525                 root->track_dirty = 1;
526                 root->last_trans = trans->transid;
527                 root->commit_root = root->node;
528                 extent_buffer_get(root->node);
529         }
530 }
531
532 static u8 imode_to_type(u32 imode)
533 {
534 #define S_SHIFT 12
535         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
537                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
538                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
539                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
540                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
541                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
542                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
543         };
544
545         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
546 #undef S_SHIFT
547 }
548
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
550 {
551         struct device_record *rec1;
552         struct device_record *rec2;
553
554         rec1 = rb_entry(node1, struct device_record, node);
555         rec2 = rb_entry(node2, struct device_record, node);
556         if (rec1->devid > rec2->devid)
557                 return -1;
558         else if (rec1->devid < rec2->devid)
559                 return 1;
560         else
561                 return 0;
562 }
563
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
565 {
566         struct inode_record *rec;
567         struct inode_backref *backref;
568         struct inode_backref *orig;
569         struct inode_backref *tmp;
570         struct orphan_data_extent *src_orphan;
571         struct orphan_data_extent *dst_orphan;
572         size_t size;
573         int ret;
574
575         rec = malloc(sizeof(*rec));
576         if (!rec)
577                 return ERR_PTR(-ENOMEM);
578         memcpy(rec, orig_rec, sizeof(*rec));
579         rec->refs = 1;
580         INIT_LIST_HEAD(&rec->backrefs);
581         INIT_LIST_HEAD(&rec->orphan_extents);
582         rec->holes = RB_ROOT;
583
584         list_for_each_entry(orig, &orig_rec->backrefs, list) {
585                 size = sizeof(*orig) + orig->namelen + 1;
586                 backref = malloc(size);
587                 if (!backref) {
588                         ret = -ENOMEM;
589                         goto cleanup;
590                 }
591                 memcpy(backref, orig, size);
592                 list_add_tail(&backref->list, &rec->backrefs);
593         }
594         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
595                 dst_orphan = malloc(sizeof(*dst_orphan));
596                 if (!dst_orphan) {
597                         ret = -ENOMEM;
598                         goto cleanup;
599                 }
600                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
601                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
602         }
603         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
604         BUG_ON(ret < 0);
605
606         return rec;
607
608 cleanup:
609         if (!list_empty(&rec->backrefs))
610                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
611                         list_del(&orig->list);
612                         free(orig);
613                 }
614
615         if (!list_empty(&rec->orphan_extents))
616                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
617                         list_del(&orig->list);
618                         free(orig);
619                 }
620
621         free(rec);
622
623         return ERR_PTR(ret);
624 }
625
626 static void print_orphan_data_extents(struct list_head *orphan_extents,
627                                       u64 objectid)
628 {
629         struct orphan_data_extent *orphan;
630
631         if (list_empty(orphan_extents))
632                 return;
633         printf("The following data extent is lost in tree %llu:\n",
634                objectid);
635         list_for_each_entry(orphan, orphan_extents, list) {
636                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
637                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
638                        orphan->disk_len);
639         }
640 }
641
642 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
643 {
644         u64 root_objectid = root->root_key.objectid;
645         int errors = rec->errors;
646
647         if (!errors)
648                 return;
649         /* reloc root errors, we print its corresponding fs root objectid*/
650         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
651                 root_objectid = root->root_key.offset;
652                 fprintf(stderr, "reloc");
653         }
654         fprintf(stderr, "root %llu inode %llu errors %x",
655                 (unsigned long long) root_objectid,
656                 (unsigned long long) rec->ino, rec->errors);
657
658         if (errors & I_ERR_NO_INODE_ITEM)
659                 fprintf(stderr, ", no inode item");
660         if (errors & I_ERR_NO_ORPHAN_ITEM)
661                 fprintf(stderr, ", no orphan item");
662         if (errors & I_ERR_DUP_INODE_ITEM)
663                 fprintf(stderr, ", dup inode item");
664         if (errors & I_ERR_DUP_DIR_INDEX)
665                 fprintf(stderr, ", dup dir index");
666         if (errors & I_ERR_ODD_DIR_ITEM)
667                 fprintf(stderr, ", odd dir item");
668         if (errors & I_ERR_ODD_FILE_EXTENT)
669                 fprintf(stderr, ", odd file extent");
670         if (errors & I_ERR_BAD_FILE_EXTENT)
671                 fprintf(stderr, ", bad file extent");
672         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
673                 fprintf(stderr, ", file extent overlap");
674         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
675                 fprintf(stderr, ", file extent discount");
676         if (errors & I_ERR_DIR_ISIZE_WRONG)
677                 fprintf(stderr, ", dir isize wrong");
678         if (errors & I_ERR_FILE_NBYTES_WRONG)
679                 fprintf(stderr, ", nbytes wrong");
680         if (errors & I_ERR_ODD_CSUM_ITEM)
681                 fprintf(stderr, ", odd csum item");
682         if (errors & I_ERR_SOME_CSUM_MISSING)
683                 fprintf(stderr, ", some csum missing");
684         if (errors & I_ERR_LINK_COUNT_WRONG)
685                 fprintf(stderr, ", link count wrong");
686         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
687                 fprintf(stderr, ", orphan file extent");
688         fprintf(stderr, "\n");
689         /* Print the orphan extents if needed */
690         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
691                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
692
693         /* Print the holes if needed */
694         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
695                 struct file_extent_hole *hole;
696                 struct rb_node *node;
697                 int found = 0;
698
699                 node = rb_first(&rec->holes);
700                 fprintf(stderr, "Found file extent holes:\n");
701                 while (node) {
702                         found = 1;
703                         hole = rb_entry(node, struct file_extent_hole, node);
704                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
705                                 hole->start, hole->len);
706                         node = rb_next(node);
707                 }
708                 if (!found)
709                         fprintf(stderr, "\tstart: 0, len: %llu\n",
710                                 round_up(rec->isize, root->sectorsize));
711         }
712 }
713
714 static void print_ref_error(int errors)
715 {
716         if (errors & REF_ERR_NO_DIR_ITEM)
717                 fprintf(stderr, ", no dir item");
718         if (errors & REF_ERR_NO_DIR_INDEX)
719                 fprintf(stderr, ", no dir index");
720         if (errors & REF_ERR_NO_INODE_REF)
721                 fprintf(stderr, ", no inode ref");
722         if (errors & REF_ERR_DUP_DIR_ITEM)
723                 fprintf(stderr, ", dup dir item");
724         if (errors & REF_ERR_DUP_DIR_INDEX)
725                 fprintf(stderr, ", dup dir index");
726         if (errors & REF_ERR_DUP_INODE_REF)
727                 fprintf(stderr, ", dup inode ref");
728         if (errors & REF_ERR_INDEX_UNMATCH)
729                 fprintf(stderr, ", index unmatch");
730         if (errors & REF_ERR_FILETYPE_UNMATCH)
731                 fprintf(stderr, ", filetype unmatch");
732         if (errors & REF_ERR_NAME_TOO_LONG)
733                 fprintf(stderr, ", name too long");
734         if (errors & REF_ERR_NO_ROOT_REF)
735                 fprintf(stderr, ", no root ref");
736         if (errors & REF_ERR_NO_ROOT_BACKREF)
737                 fprintf(stderr, ", no root backref");
738         if (errors & REF_ERR_DUP_ROOT_REF)
739                 fprintf(stderr, ", dup root ref");
740         if (errors & REF_ERR_DUP_ROOT_BACKREF)
741                 fprintf(stderr, ", dup root backref");
742         fprintf(stderr, "\n");
743 }
744
745 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
746                                           u64 ino, int mod)
747 {
748         struct ptr_node *node;
749         struct cache_extent *cache;
750         struct inode_record *rec = NULL;
751         int ret;
752
753         cache = lookup_cache_extent(inode_cache, ino, 1);
754         if (cache) {
755                 node = container_of(cache, struct ptr_node, cache);
756                 rec = node->data;
757                 if (mod && rec->refs > 1) {
758                         node->data = clone_inode_rec(rec);
759                         if (IS_ERR(node->data))
760                                 return node->data;
761                         rec->refs--;
762                         rec = node->data;
763                 }
764         } else if (mod) {
765                 rec = calloc(1, sizeof(*rec));
766                 if (!rec)
767                         return ERR_PTR(-ENOMEM);
768                 rec->ino = ino;
769                 rec->extent_start = (u64)-1;
770                 rec->refs = 1;
771                 INIT_LIST_HEAD(&rec->backrefs);
772                 INIT_LIST_HEAD(&rec->orphan_extents);
773                 rec->holes = RB_ROOT;
774
775                 node = malloc(sizeof(*node));
776                 if (!node) {
777                         free(rec);
778                         return ERR_PTR(-ENOMEM);
779                 }
780                 node->cache.start = ino;
781                 node->cache.size = 1;
782                 node->data = rec;
783
784                 if (ino == BTRFS_FREE_INO_OBJECTID)
785                         rec->found_link = 1;
786
787                 ret = insert_cache_extent(inode_cache, &node->cache);
788                 if (ret)
789                         return ERR_PTR(-EEXIST);
790         }
791         return rec;
792 }
793
794 static void free_orphan_data_extents(struct list_head *orphan_extents)
795 {
796         struct orphan_data_extent *orphan;
797
798         while (!list_empty(orphan_extents)) {
799                 orphan = list_entry(orphan_extents->next,
800                                     struct orphan_data_extent, list);
801                 list_del(&orphan->list);
802                 free(orphan);
803         }
804 }
805
806 static void free_inode_rec(struct inode_record *rec)
807 {
808         struct inode_backref *backref;
809
810         if (--rec->refs > 0)
811                 return;
812
813         while (!list_empty(&rec->backrefs)) {
814                 backref = list_entry(rec->backrefs.next,
815                                      struct inode_backref, list);
816                 list_del(&backref->list);
817                 free(backref);
818         }
819         free_orphan_data_extents(&rec->orphan_extents);
820         free_file_extent_holes(&rec->holes);
821         free(rec);
822 }
823
824 static int can_free_inode_rec(struct inode_record *rec)
825 {
826         if (!rec->errors && rec->checked && rec->found_inode_item &&
827             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
828                 return 1;
829         return 0;
830 }
831
832 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
833                                  struct inode_record *rec)
834 {
835         struct cache_extent *cache;
836         struct inode_backref *tmp, *backref;
837         struct ptr_node *node;
838         unsigned char filetype;
839
840         if (!rec->found_inode_item)
841                 return;
842
843         filetype = imode_to_type(rec->imode);
844         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
845                 if (backref->found_dir_item && backref->found_dir_index) {
846                         if (backref->filetype != filetype)
847                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
848                         if (!backref->errors && backref->found_inode_ref &&
849                             rec->nlink == rec->found_link) {
850                                 list_del(&backref->list);
851                                 free(backref);
852                         }
853                 }
854         }
855
856         if (!rec->checked || rec->merging)
857                 return;
858
859         if (S_ISDIR(rec->imode)) {
860                 if (rec->found_size != rec->isize)
861                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
862                 if (rec->found_file_extent)
863                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
864         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
865                 if (rec->found_dir_item)
866                         rec->errors |= I_ERR_ODD_DIR_ITEM;
867                 if (rec->found_size != rec->nbytes)
868                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
869                 if (rec->nlink > 0 && !no_holes &&
870                     (rec->extent_end < rec->isize ||
871                      first_extent_gap(&rec->holes) < rec->isize))
872                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
873         }
874
875         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
876                 if (rec->found_csum_item && rec->nodatasum)
877                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
878                 if (rec->some_csum_missing && !rec->nodatasum)
879                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
880         }
881
882         BUG_ON(rec->refs != 1);
883         if (can_free_inode_rec(rec)) {
884                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
885                 node = container_of(cache, struct ptr_node, cache);
886                 BUG_ON(node->data != rec);
887                 remove_cache_extent(inode_cache, &node->cache);
888                 free(node);
889                 free_inode_rec(rec);
890         }
891 }
892
893 static int check_orphan_item(struct btrfs_root *root, u64 ino)
894 {
895         struct btrfs_path path;
896         struct btrfs_key key;
897         int ret;
898
899         key.objectid = BTRFS_ORPHAN_OBJECTID;
900         key.type = BTRFS_ORPHAN_ITEM_KEY;
901         key.offset = ino;
902
903         btrfs_init_path(&path);
904         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
905         btrfs_release_path(&path);
906         if (ret > 0)
907                 ret = -ENOENT;
908         return ret;
909 }
910
911 static int process_inode_item(struct extent_buffer *eb,
912                               int slot, struct btrfs_key *key,
913                               struct shared_node *active_node)
914 {
915         struct inode_record *rec;
916         struct btrfs_inode_item *item;
917
918         rec = active_node->current;
919         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
920         if (rec->found_inode_item) {
921                 rec->errors |= I_ERR_DUP_INODE_ITEM;
922                 return 1;
923         }
924         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
925         rec->nlink = btrfs_inode_nlink(eb, item);
926         rec->isize = btrfs_inode_size(eb, item);
927         rec->nbytes = btrfs_inode_nbytes(eb, item);
928         rec->imode = btrfs_inode_mode(eb, item);
929         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
930                 rec->nodatasum = 1;
931         rec->found_inode_item = 1;
932         if (rec->nlink == 0)
933                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
934         maybe_free_inode_rec(&active_node->inode_cache, rec);
935         return 0;
936 }
937
938 static struct inode_backref *get_inode_backref(struct inode_record *rec,
939                                                 const char *name,
940                                                 int namelen, u64 dir)
941 {
942         struct inode_backref *backref;
943
944         list_for_each_entry(backref, &rec->backrefs, list) {
945                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
946                         break;
947                 if (backref->dir != dir || backref->namelen != namelen)
948                         continue;
949                 if (memcmp(name, backref->name, namelen))
950                         continue;
951                 return backref;
952         }
953
954         backref = malloc(sizeof(*backref) + namelen + 1);
955         if (!backref)
956                 return NULL;
957         memset(backref, 0, sizeof(*backref));
958         backref->dir = dir;
959         backref->namelen = namelen;
960         memcpy(backref->name, name, namelen);
961         backref->name[namelen] = '\0';
962         list_add_tail(&backref->list, &rec->backrefs);
963         return backref;
964 }
965
966 static int add_inode_backref(struct cache_tree *inode_cache,
967                              u64 ino, u64 dir, u64 index,
968                              const char *name, int namelen,
969                              int filetype, int itemtype, int errors)
970 {
971         struct inode_record *rec;
972         struct inode_backref *backref;
973
974         rec = get_inode_rec(inode_cache, ino, 1);
975         BUG_ON(IS_ERR(rec));
976         backref = get_inode_backref(rec, name, namelen, dir);
977         BUG_ON(!backref);
978         if (errors)
979                 backref->errors |= errors;
980         if (itemtype == BTRFS_DIR_INDEX_KEY) {
981                 if (backref->found_dir_index)
982                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
983                 if (backref->found_inode_ref && backref->index != index)
984                         backref->errors |= REF_ERR_INDEX_UNMATCH;
985                 if (backref->found_dir_item && backref->filetype != filetype)
986                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
987
988                 backref->index = index;
989                 backref->filetype = filetype;
990                 backref->found_dir_index = 1;
991         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
992                 rec->found_link++;
993                 if (backref->found_dir_item)
994                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
995                 if (backref->found_dir_index && backref->filetype != filetype)
996                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
997
998                 backref->filetype = filetype;
999                 backref->found_dir_item = 1;
1000         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1001                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1002                 if (backref->found_inode_ref)
1003                         backref->errors |= REF_ERR_DUP_INODE_REF;
1004                 if (backref->found_dir_index && backref->index != index)
1005                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1006                 else
1007                         backref->index = index;
1008
1009                 backref->ref_type = itemtype;
1010                 backref->found_inode_ref = 1;
1011         } else {
1012                 BUG_ON(1);
1013         }
1014
1015         maybe_free_inode_rec(inode_cache, rec);
1016         return 0;
1017 }
1018
1019 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1020                             struct cache_tree *dst_cache)
1021 {
1022         struct inode_backref *backref;
1023         u32 dir_count = 0;
1024         int ret = 0;
1025
1026         dst->merging = 1;
1027         list_for_each_entry(backref, &src->backrefs, list) {
1028                 if (backref->found_dir_index) {
1029                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1030                                         backref->index, backref->name,
1031                                         backref->namelen, backref->filetype,
1032                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1033                 }
1034                 if (backref->found_dir_item) {
1035                         dir_count++;
1036                         add_inode_backref(dst_cache, dst->ino,
1037                                         backref->dir, 0, backref->name,
1038                                         backref->namelen, backref->filetype,
1039                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1040                 }
1041                 if (backref->found_inode_ref) {
1042                         add_inode_backref(dst_cache, dst->ino,
1043                                         backref->dir, backref->index,
1044                                         backref->name, backref->namelen, 0,
1045                                         backref->ref_type, backref->errors);
1046                 }
1047         }
1048
1049         if (src->found_dir_item)
1050                 dst->found_dir_item = 1;
1051         if (src->found_file_extent)
1052                 dst->found_file_extent = 1;
1053         if (src->found_csum_item)
1054                 dst->found_csum_item = 1;
1055         if (src->some_csum_missing)
1056                 dst->some_csum_missing = 1;
1057         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1058                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1059                 if (ret < 0)
1060                         return ret;
1061         }
1062
1063         BUG_ON(src->found_link < dir_count);
1064         dst->found_link += src->found_link - dir_count;
1065         dst->found_size += src->found_size;
1066         if (src->extent_start != (u64)-1) {
1067                 if (dst->extent_start == (u64)-1) {
1068                         dst->extent_start = src->extent_start;
1069                         dst->extent_end = src->extent_end;
1070                 } else {
1071                         if (dst->extent_end > src->extent_start)
1072                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1073                         else if (dst->extent_end < src->extent_start) {
1074                                 ret = add_file_extent_hole(&dst->holes,
1075                                         dst->extent_end,
1076                                         src->extent_start - dst->extent_end);
1077                         }
1078                         if (dst->extent_end < src->extent_end)
1079                                 dst->extent_end = src->extent_end;
1080                 }
1081         }
1082
1083         dst->errors |= src->errors;
1084         if (src->found_inode_item) {
1085                 if (!dst->found_inode_item) {
1086                         dst->nlink = src->nlink;
1087                         dst->isize = src->isize;
1088                         dst->nbytes = src->nbytes;
1089                         dst->imode = src->imode;
1090                         dst->nodatasum = src->nodatasum;
1091                         dst->found_inode_item = 1;
1092                 } else {
1093                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1094                 }
1095         }
1096         dst->merging = 0;
1097
1098         return 0;
1099 }
1100
1101 static int splice_shared_node(struct shared_node *src_node,
1102                               struct shared_node *dst_node)
1103 {
1104         struct cache_extent *cache;
1105         struct ptr_node *node, *ins;
1106         struct cache_tree *src, *dst;
1107         struct inode_record *rec, *conflict;
1108         u64 current_ino = 0;
1109         int splice = 0;
1110         int ret;
1111
1112         if (--src_node->refs == 0)
1113                 splice = 1;
1114         if (src_node->current)
1115                 current_ino = src_node->current->ino;
1116
1117         src = &src_node->root_cache;
1118         dst = &dst_node->root_cache;
1119 again:
1120         cache = search_cache_extent(src, 0);
1121         while (cache) {
1122                 node = container_of(cache, struct ptr_node, cache);
1123                 rec = node->data;
1124                 cache = next_cache_extent(cache);
1125
1126                 if (splice) {
1127                         remove_cache_extent(src, &node->cache);
1128                         ins = node;
1129                 } else {
1130                         ins = malloc(sizeof(*ins));
1131                         BUG_ON(!ins);
1132                         ins->cache.start = node->cache.start;
1133                         ins->cache.size = node->cache.size;
1134                         ins->data = rec;
1135                         rec->refs++;
1136                 }
1137                 ret = insert_cache_extent(dst, &ins->cache);
1138                 if (ret == -EEXIST) {
1139                         conflict = get_inode_rec(dst, rec->ino, 1);
1140                         BUG_ON(IS_ERR(conflict));
1141                         merge_inode_recs(rec, conflict, dst);
1142                         if (rec->checked) {
1143                                 conflict->checked = 1;
1144                                 if (dst_node->current == conflict)
1145                                         dst_node->current = NULL;
1146                         }
1147                         maybe_free_inode_rec(dst, conflict);
1148                         free_inode_rec(rec);
1149                         free(ins);
1150                 } else {
1151                         BUG_ON(ret);
1152                 }
1153         }
1154
1155         if (src == &src_node->root_cache) {
1156                 src = &src_node->inode_cache;
1157                 dst = &dst_node->inode_cache;
1158                 goto again;
1159         }
1160
1161         if (current_ino > 0 && (!dst_node->current ||
1162             current_ino > dst_node->current->ino)) {
1163                 if (dst_node->current) {
1164                         dst_node->current->checked = 1;
1165                         maybe_free_inode_rec(dst, dst_node->current);
1166                 }
1167                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1168                 BUG_ON(IS_ERR(dst_node->current));
1169         }
1170         return 0;
1171 }
1172
1173 static void free_inode_ptr(struct cache_extent *cache)
1174 {
1175         struct ptr_node *node;
1176         struct inode_record *rec;
1177
1178         node = container_of(cache, struct ptr_node, cache);
1179         rec = node->data;
1180         free_inode_rec(rec);
1181         free(node);
1182 }
1183
1184 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1185
1186 static struct shared_node *find_shared_node(struct cache_tree *shared,
1187                                             u64 bytenr)
1188 {
1189         struct cache_extent *cache;
1190         struct shared_node *node;
1191
1192         cache = lookup_cache_extent(shared, bytenr, 1);
1193         if (cache) {
1194                 node = container_of(cache, struct shared_node, cache);
1195                 return node;
1196         }
1197         return NULL;
1198 }
1199
1200 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1201 {
1202         int ret;
1203         struct shared_node *node;
1204
1205         node = calloc(1, sizeof(*node));
1206         if (!node)
1207                 return -ENOMEM;
1208         node->cache.start = bytenr;
1209         node->cache.size = 1;
1210         cache_tree_init(&node->root_cache);
1211         cache_tree_init(&node->inode_cache);
1212         node->refs = refs;
1213
1214         ret = insert_cache_extent(shared, &node->cache);
1215
1216         return ret;
1217 }
1218
1219 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1220                              struct walk_control *wc, int level)
1221 {
1222         struct shared_node *node;
1223         struct shared_node *dest;
1224         int ret;
1225
1226         if (level == wc->active_node)
1227                 return 0;
1228
1229         BUG_ON(wc->active_node <= level);
1230         node = find_shared_node(&wc->shared, bytenr);
1231         if (!node) {
1232                 ret = add_shared_node(&wc->shared, bytenr, refs);
1233                 BUG_ON(ret);
1234                 node = find_shared_node(&wc->shared, bytenr);
1235                 wc->nodes[level] = node;
1236                 wc->active_node = level;
1237                 return 0;
1238         }
1239
1240         if (wc->root_level == wc->active_node &&
1241             btrfs_root_refs(&root->root_item) == 0) {
1242                 if (--node->refs == 0) {
1243                         free_inode_recs_tree(&node->root_cache);
1244                         free_inode_recs_tree(&node->inode_cache);
1245                         remove_cache_extent(&wc->shared, &node->cache);
1246                         free(node);
1247                 }
1248                 return 1;
1249         }
1250
1251         dest = wc->nodes[wc->active_node];
1252         splice_shared_node(node, dest);
1253         if (node->refs == 0) {
1254                 remove_cache_extent(&wc->shared, &node->cache);
1255                 free(node);
1256         }
1257         return 1;
1258 }
1259
1260 static int leave_shared_node(struct btrfs_root *root,
1261                              struct walk_control *wc, int level)
1262 {
1263         struct shared_node *node;
1264         struct shared_node *dest;
1265         int i;
1266
1267         if (level == wc->root_level)
1268                 return 0;
1269
1270         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1271                 if (wc->nodes[i])
1272                         break;
1273         }
1274         BUG_ON(i >= BTRFS_MAX_LEVEL);
1275
1276         node = wc->nodes[wc->active_node];
1277         wc->nodes[wc->active_node] = NULL;
1278         wc->active_node = i;
1279
1280         dest = wc->nodes[wc->active_node];
1281         if (wc->active_node < wc->root_level ||
1282             btrfs_root_refs(&root->root_item) > 0) {
1283                 BUG_ON(node->refs <= 1);
1284                 splice_shared_node(node, dest);
1285         } else {
1286                 BUG_ON(node->refs < 2);
1287                 node->refs--;
1288         }
1289         return 0;
1290 }
1291
1292 /*
1293  * Returns:
1294  * < 0 - on error
1295  * 1   - if the root with id child_root_id is a child of root parent_root_id
1296  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1297  *       has other root(s) as parent(s)
1298  * 2   - if the root child_root_id doesn't have any parent roots
1299  */
1300 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1301                          u64 child_root_id)
1302 {
1303         struct btrfs_path path;
1304         struct btrfs_key key;
1305         struct extent_buffer *leaf;
1306         int has_parent = 0;
1307         int ret;
1308
1309         btrfs_init_path(&path);
1310
1311         key.objectid = parent_root_id;
1312         key.type = BTRFS_ROOT_REF_KEY;
1313         key.offset = child_root_id;
1314         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1315                                 0, 0);
1316         if (ret < 0)
1317                 return ret;
1318         btrfs_release_path(&path);
1319         if (!ret)
1320                 return 1;
1321
1322         key.objectid = child_root_id;
1323         key.type = BTRFS_ROOT_BACKREF_KEY;
1324         key.offset = 0;
1325         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1326                                 0, 0);
1327         if (ret < 0)
1328                 goto out;
1329
1330         while (1) {
1331                 leaf = path.nodes[0];
1332                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1333                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1334                         if (ret)
1335                                 break;
1336                         leaf = path.nodes[0];
1337                 }
1338
1339                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1340                 if (key.objectid != child_root_id ||
1341                     key.type != BTRFS_ROOT_BACKREF_KEY)
1342                         break;
1343
1344                 has_parent = 1;
1345
1346                 if (key.offset == parent_root_id) {
1347                         btrfs_release_path(&path);
1348                         return 1;
1349                 }
1350
1351                 path.slots[0]++;
1352         }
1353 out:
1354         btrfs_release_path(&path);
1355         if (ret < 0)
1356                 return ret;
1357         return has_parent ? 0 : 2;
1358 }
1359
1360 static int process_dir_item(struct btrfs_root *root,
1361                             struct extent_buffer *eb,
1362                             int slot, struct btrfs_key *key,
1363                             struct shared_node *active_node)
1364 {
1365         u32 total;
1366         u32 cur = 0;
1367         u32 len;
1368         u32 name_len;
1369         u32 data_len;
1370         int error;
1371         int nritems = 0;
1372         int filetype;
1373         struct btrfs_dir_item *di;
1374         struct inode_record *rec;
1375         struct cache_tree *root_cache;
1376         struct cache_tree *inode_cache;
1377         struct btrfs_key location;
1378         char namebuf[BTRFS_NAME_LEN];
1379
1380         root_cache = &active_node->root_cache;
1381         inode_cache = &active_node->inode_cache;
1382         rec = active_node->current;
1383         rec->found_dir_item = 1;
1384
1385         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1386         total = btrfs_item_size_nr(eb, slot);
1387         while (cur < total) {
1388                 nritems++;
1389                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1390                 name_len = btrfs_dir_name_len(eb, di);
1391                 data_len = btrfs_dir_data_len(eb, di);
1392                 filetype = btrfs_dir_type(eb, di);
1393
1394                 rec->found_size += name_len;
1395                 if (name_len <= BTRFS_NAME_LEN) {
1396                         len = name_len;
1397                         error = 0;
1398                 } else {
1399                         len = BTRFS_NAME_LEN;
1400                         error = REF_ERR_NAME_TOO_LONG;
1401                 }
1402                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1403
1404                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1405                         add_inode_backref(inode_cache, location.objectid,
1406                                           key->objectid, key->offset, namebuf,
1407                                           len, filetype, key->type, error);
1408                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1409                         add_inode_backref(root_cache, location.objectid,
1410                                           key->objectid, key->offset,
1411                                           namebuf, len, filetype,
1412                                           key->type, error);
1413                 } else {
1414                         fprintf(stderr, "invalid location in dir item %u\n",
1415                                 location.type);
1416                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1417                                           key->objectid, key->offset, namebuf,
1418                                           len, filetype, key->type, error);
1419                 }
1420
1421                 len = sizeof(*di) + name_len + data_len;
1422                 di = (struct btrfs_dir_item *)((char *)di + len);
1423                 cur += len;
1424         }
1425         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1426                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1427
1428         return 0;
1429 }
1430
1431 static int process_inode_ref(struct extent_buffer *eb,
1432                              int slot, struct btrfs_key *key,
1433                              struct shared_node *active_node)
1434 {
1435         u32 total;
1436         u32 cur = 0;
1437         u32 len;
1438         u32 name_len;
1439         u64 index;
1440         int error;
1441         struct cache_tree *inode_cache;
1442         struct btrfs_inode_ref *ref;
1443         char namebuf[BTRFS_NAME_LEN];
1444
1445         inode_cache = &active_node->inode_cache;
1446
1447         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1448         total = btrfs_item_size_nr(eb, slot);
1449         while (cur < total) {
1450                 name_len = btrfs_inode_ref_name_len(eb, ref);
1451                 index = btrfs_inode_ref_index(eb, ref);
1452                 if (name_len <= BTRFS_NAME_LEN) {
1453                         len = name_len;
1454                         error = 0;
1455                 } else {
1456                         len = BTRFS_NAME_LEN;
1457                         error = REF_ERR_NAME_TOO_LONG;
1458                 }
1459                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1460                 add_inode_backref(inode_cache, key->objectid, key->offset,
1461                                   index, namebuf, len, 0, key->type, error);
1462
1463                 len = sizeof(*ref) + name_len;
1464                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1465                 cur += len;
1466         }
1467         return 0;
1468 }
1469
1470 static int process_inode_extref(struct extent_buffer *eb,
1471                                 int slot, struct btrfs_key *key,
1472                                 struct shared_node *active_node)
1473 {
1474         u32 total;
1475         u32 cur = 0;
1476         u32 len;
1477         u32 name_len;
1478         u64 index;
1479         u64 parent;
1480         int error;
1481         struct cache_tree *inode_cache;
1482         struct btrfs_inode_extref *extref;
1483         char namebuf[BTRFS_NAME_LEN];
1484
1485         inode_cache = &active_node->inode_cache;
1486
1487         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1488         total = btrfs_item_size_nr(eb, slot);
1489         while (cur < total) {
1490                 name_len = btrfs_inode_extref_name_len(eb, extref);
1491                 index = btrfs_inode_extref_index(eb, extref);
1492                 parent = btrfs_inode_extref_parent(eb, extref);
1493                 if (name_len <= BTRFS_NAME_LEN) {
1494                         len = name_len;
1495                         error = 0;
1496                 } else {
1497                         len = BTRFS_NAME_LEN;
1498                         error = REF_ERR_NAME_TOO_LONG;
1499                 }
1500                 read_extent_buffer(eb, namebuf,
1501                                    (unsigned long)(extref + 1), len);
1502                 add_inode_backref(inode_cache, key->objectid, parent,
1503                                   index, namebuf, len, 0, key->type, error);
1504
1505                 len = sizeof(*extref) + name_len;
1506                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1507                 cur += len;
1508         }
1509         return 0;
1510
1511 }
1512
1513 static int count_csum_range(struct btrfs_root *root, u64 start,
1514                             u64 len, u64 *found)
1515 {
1516         struct btrfs_key key;
1517         struct btrfs_path path;
1518         struct extent_buffer *leaf;
1519         int ret;
1520         size_t size;
1521         *found = 0;
1522         u64 csum_end;
1523         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1524
1525         btrfs_init_path(&path);
1526
1527         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1528         key.offset = start;
1529         key.type = BTRFS_EXTENT_CSUM_KEY;
1530
1531         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1532                                 &key, &path, 0, 0);
1533         if (ret < 0)
1534                 goto out;
1535         if (ret > 0 && path.slots[0] > 0) {
1536                 leaf = path.nodes[0];
1537                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1538                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1539                     key.type == BTRFS_EXTENT_CSUM_KEY)
1540                         path.slots[0]--;
1541         }
1542
1543         while (len > 0) {
1544                 leaf = path.nodes[0];
1545                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1546                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1547                         if (ret > 0)
1548                                 break;
1549                         else if (ret < 0)
1550                                 goto out;
1551                         leaf = path.nodes[0];
1552                 }
1553
1554                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1555                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1556                     key.type != BTRFS_EXTENT_CSUM_KEY)
1557                         break;
1558
1559                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1560                 if (key.offset >= start + len)
1561                         break;
1562
1563                 if (key.offset > start)
1564                         start = key.offset;
1565
1566                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1567                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1568                 if (csum_end > start) {
1569                         size = min(csum_end - start, len);
1570                         len -= size;
1571                         start += size;
1572                         *found += size;
1573                 }
1574
1575                 path.slots[0]++;
1576         }
1577 out:
1578         btrfs_release_path(&path);
1579         if (ret < 0)
1580                 return ret;
1581         return 0;
1582 }
1583
1584 static int process_file_extent(struct btrfs_root *root,
1585                                 struct extent_buffer *eb,
1586                                 int slot, struct btrfs_key *key,
1587                                 struct shared_node *active_node)
1588 {
1589         struct inode_record *rec;
1590         struct btrfs_file_extent_item *fi;
1591         u64 num_bytes = 0;
1592         u64 disk_bytenr = 0;
1593         u64 extent_offset = 0;
1594         u64 mask = root->sectorsize - 1;
1595         int extent_type;
1596         int ret;
1597
1598         rec = active_node->current;
1599         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1600         rec->found_file_extent = 1;
1601
1602         if (rec->extent_start == (u64)-1) {
1603                 rec->extent_start = key->offset;
1604                 rec->extent_end = key->offset;
1605         }
1606
1607         if (rec->extent_end > key->offset)
1608                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1609         else if (rec->extent_end < key->offset) {
1610                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1611                                            key->offset - rec->extent_end);
1612                 if (ret < 0)
1613                         return ret;
1614         }
1615
1616         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1617         extent_type = btrfs_file_extent_type(eb, fi);
1618
1619         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1620                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1621                 if (num_bytes == 0)
1622                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1623                 rec->found_size += num_bytes;
1624                 num_bytes = (num_bytes + mask) & ~mask;
1625         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1626                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1627                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1628                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1629                 extent_offset = btrfs_file_extent_offset(eb, fi);
1630                 if (num_bytes == 0 || (num_bytes & mask))
1631                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1632                 if (num_bytes + extent_offset >
1633                     btrfs_file_extent_ram_bytes(eb, fi))
1634                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1635                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1636                     (btrfs_file_extent_compression(eb, fi) ||
1637                      btrfs_file_extent_encryption(eb, fi) ||
1638                      btrfs_file_extent_other_encoding(eb, fi)))
1639                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1640                 if (disk_bytenr > 0)
1641                         rec->found_size += num_bytes;
1642         } else {
1643                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1644         }
1645         rec->extent_end = key->offset + num_bytes;
1646
1647         /*
1648          * The data reloc tree will copy full extents into its inode and then
1649          * copy the corresponding csums.  Because the extent it copied could be
1650          * a preallocated extent that hasn't been written to yet there may be no
1651          * csums to copy, ergo we won't have csums for our file extent.  This is
1652          * ok so just don't bother checking csums if the inode belongs to the
1653          * data reloc tree.
1654          */
1655         if (disk_bytenr > 0 &&
1656             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1657                 u64 found;
1658                 if (btrfs_file_extent_compression(eb, fi))
1659                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1660                 else
1661                         disk_bytenr += extent_offset;
1662
1663                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1664                 if (ret < 0)
1665                         return ret;
1666                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1667                         if (found > 0)
1668                                 rec->found_csum_item = 1;
1669                         if (found < num_bytes)
1670                                 rec->some_csum_missing = 1;
1671                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1672                         if (found > 0)
1673                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1674                 }
1675         }
1676         return 0;
1677 }
1678
1679 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1680                             struct walk_control *wc)
1681 {
1682         struct btrfs_key key;
1683         u32 nritems;
1684         int i;
1685         int ret = 0;
1686         struct cache_tree *inode_cache;
1687         struct shared_node *active_node;
1688
1689         if (wc->root_level == wc->active_node &&
1690             btrfs_root_refs(&root->root_item) == 0)
1691                 return 0;
1692
1693         active_node = wc->nodes[wc->active_node];
1694         inode_cache = &active_node->inode_cache;
1695         nritems = btrfs_header_nritems(eb);
1696         for (i = 0; i < nritems; i++) {
1697                 btrfs_item_key_to_cpu(eb, &key, i);
1698
1699                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1700                         continue;
1701                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1702                         continue;
1703
1704                 if (active_node->current == NULL ||
1705                     active_node->current->ino < key.objectid) {
1706                         if (active_node->current) {
1707                                 active_node->current->checked = 1;
1708                                 maybe_free_inode_rec(inode_cache,
1709                                                      active_node->current);
1710                         }
1711                         active_node->current = get_inode_rec(inode_cache,
1712                                                              key.objectid, 1);
1713                         BUG_ON(IS_ERR(active_node->current));
1714                 }
1715                 switch (key.type) {
1716                 case BTRFS_DIR_ITEM_KEY:
1717                 case BTRFS_DIR_INDEX_KEY:
1718                         ret = process_dir_item(root, eb, i, &key, active_node);
1719                         break;
1720                 case BTRFS_INODE_REF_KEY:
1721                         ret = process_inode_ref(eb, i, &key, active_node);
1722                         break;
1723                 case BTRFS_INODE_EXTREF_KEY:
1724                         ret = process_inode_extref(eb, i, &key, active_node);
1725                         break;
1726                 case BTRFS_INODE_ITEM_KEY:
1727                         ret = process_inode_item(eb, i, &key, active_node);
1728                         break;
1729                 case BTRFS_EXTENT_DATA_KEY:
1730                         ret = process_file_extent(root, eb, i, &key,
1731                                                   active_node);
1732                         break;
1733                 default:
1734                         break;
1735                 };
1736         }
1737         return ret;
1738 }
1739
1740 static void reada_walk_down(struct btrfs_root *root,
1741                             struct extent_buffer *node, int slot)
1742 {
1743         u64 bytenr;
1744         u64 ptr_gen;
1745         u32 nritems;
1746         u32 blocksize;
1747         int i;
1748         int level;
1749
1750         level = btrfs_header_level(node);
1751         if (level != 1)
1752                 return;
1753
1754         nritems = btrfs_header_nritems(node);
1755         blocksize = btrfs_level_size(root, level - 1);
1756         for (i = slot; i < nritems; i++) {
1757                 bytenr = btrfs_node_blockptr(node, i);
1758                 ptr_gen = btrfs_node_ptr_generation(node, i);
1759                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1760         }
1761 }
1762
1763 /*
1764  * Check the child node/leaf by the following condition:
1765  * 1. the first item key of the node/leaf should be the same with the one
1766  *    in parent.
1767  * 2. block in parent node should match the child node/leaf.
1768  * 3. generation of parent node and child's header should be consistent.
1769  *
1770  * Or the child node/leaf pointed by the key in parent is not valid.
1771  *
1772  * We hope to check leaf owner too, but since subvol may share leaves,
1773  * which makes leaf owner check not so strong, key check should be
1774  * sufficient enough for that case.
1775  */
1776 static int check_child_node(struct btrfs_root *root,
1777                             struct extent_buffer *parent, int slot,
1778                             struct extent_buffer *child)
1779 {
1780         struct btrfs_key parent_key;
1781         struct btrfs_key child_key;
1782         int ret = 0;
1783
1784         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1785         if (btrfs_header_level(child) == 0)
1786                 btrfs_item_key_to_cpu(child, &child_key, 0);
1787         else
1788                 btrfs_node_key_to_cpu(child, &child_key, 0);
1789
1790         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1791                 ret = -EINVAL;
1792                 fprintf(stderr,
1793                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1794                         parent_key.objectid, parent_key.type, parent_key.offset,
1795                         child_key.objectid, child_key.type, child_key.offset);
1796         }
1797         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1798                 ret = -EINVAL;
1799                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1800                         btrfs_node_blockptr(parent, slot),
1801                         btrfs_header_bytenr(child));
1802         }
1803         if (btrfs_node_ptr_generation(parent, slot) !=
1804             btrfs_header_generation(child)) {
1805                 ret = -EINVAL;
1806                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1807                         btrfs_header_generation(child),
1808                         btrfs_node_ptr_generation(parent, slot));
1809         }
1810         return ret;
1811 }
1812
1813 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1814                           struct walk_control *wc, int *level)
1815 {
1816         enum btrfs_tree_block_status status;
1817         u64 bytenr;
1818         u64 ptr_gen;
1819         struct extent_buffer *next;
1820         struct extent_buffer *cur;
1821         u32 blocksize;
1822         int ret, err = 0;
1823         u64 refs;
1824
1825         WARN_ON(*level < 0);
1826         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1827         ret = btrfs_lookup_extent_info(NULL, root,
1828                                        path->nodes[*level]->start,
1829                                        *level, 1, &refs, NULL);
1830         if (ret < 0) {
1831                 err = ret;
1832                 goto out;
1833         }
1834
1835         if (refs > 1) {
1836                 ret = enter_shared_node(root, path->nodes[*level]->start,
1837                                         refs, wc, *level);
1838                 if (ret > 0) {
1839                         err = ret;
1840                         goto out;
1841                 }
1842         }
1843
1844         while (*level >= 0) {
1845                 WARN_ON(*level < 0);
1846                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1847                 cur = path->nodes[*level];
1848
1849                 if (btrfs_header_level(cur) != *level)
1850                         WARN_ON(1);
1851
1852                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1853                         break;
1854                 if (*level == 0) {
1855                         ret = process_one_leaf(root, cur, wc);
1856                         if (ret < 0)
1857                                 err = ret;
1858                         break;
1859                 }
1860                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1861                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1862                 blocksize = btrfs_level_size(root, *level - 1);
1863                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1864                                                1, &refs, NULL);
1865                 if (ret < 0)
1866                         refs = 0;
1867
1868                 if (refs > 1) {
1869                         ret = enter_shared_node(root, bytenr, refs,
1870                                                 wc, *level - 1);
1871                         if (ret > 0) {
1872                                 path->slots[*level]++;
1873                                 continue;
1874                         }
1875                 }
1876
1877                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1878                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1879                         free_extent_buffer(next);
1880                         reada_walk_down(root, cur, path->slots[*level]);
1881                         next = read_tree_block(root, bytenr, blocksize,
1882                                                ptr_gen);
1883                         if (!extent_buffer_uptodate(next)) {
1884                                 struct btrfs_key node_key;
1885
1886                                 btrfs_node_key_to_cpu(path->nodes[*level],
1887                                                       &node_key,
1888                                                       path->slots[*level]);
1889                                 btrfs_add_corrupt_extent_record(root->fs_info,
1890                                                 &node_key,
1891                                                 path->nodes[*level]->start,
1892                                                 root->leafsize, *level);
1893                                 err = -EIO;
1894                                 goto out;
1895                         }
1896                 }
1897
1898                 ret = check_child_node(root, cur, path->slots[*level], next);
1899                 if (ret) {
1900                         err = ret;
1901                         goto out;
1902                 }
1903
1904                 if (btrfs_is_leaf(next))
1905                         status = btrfs_check_leaf(root, NULL, next);
1906                 else
1907                         status = btrfs_check_node(root, NULL, next);
1908                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1909                         free_extent_buffer(next);
1910                         err = -EIO;
1911                         goto out;
1912                 }
1913
1914                 *level = *level - 1;
1915                 free_extent_buffer(path->nodes[*level]);
1916                 path->nodes[*level] = next;
1917                 path->slots[*level] = 0;
1918         }
1919 out:
1920         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1921         return err;
1922 }
1923
1924 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1925                         struct walk_control *wc, int *level)
1926 {
1927         int i;
1928         struct extent_buffer *leaf;
1929
1930         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1931                 leaf = path->nodes[i];
1932                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1933                         path->slots[i]++;
1934                         *level = i;
1935                         return 0;
1936                 } else {
1937                         free_extent_buffer(path->nodes[*level]);
1938                         path->nodes[*level] = NULL;
1939                         BUG_ON(*level > wc->active_node);
1940                         if (*level == wc->active_node)
1941                                 leave_shared_node(root, wc, *level);
1942                         *level = i + 1;
1943                 }
1944         }
1945         return 1;
1946 }
1947
1948 static int check_root_dir(struct inode_record *rec)
1949 {
1950         struct inode_backref *backref;
1951         int ret = -1;
1952
1953         if (!rec->found_inode_item || rec->errors)
1954                 goto out;
1955         if (rec->nlink != 1 || rec->found_link != 0)
1956                 goto out;
1957         if (list_empty(&rec->backrefs))
1958                 goto out;
1959         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1960         if (!backref->found_inode_ref)
1961                 goto out;
1962         if (backref->index != 0 || backref->namelen != 2 ||
1963             memcmp(backref->name, "..", 2))
1964                 goto out;
1965         if (backref->found_dir_index || backref->found_dir_item)
1966                 goto out;
1967         ret = 0;
1968 out:
1969         return ret;
1970 }
1971
1972 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1973                               struct btrfs_root *root, struct btrfs_path *path,
1974                               struct inode_record *rec)
1975 {
1976         struct btrfs_inode_item *ei;
1977         struct btrfs_key key;
1978         int ret;
1979
1980         key.objectid = rec->ino;
1981         key.type = BTRFS_INODE_ITEM_KEY;
1982         key.offset = (u64)-1;
1983
1984         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1985         if (ret < 0)
1986                 goto out;
1987         if (ret) {
1988                 if (!path->slots[0]) {
1989                         ret = -ENOENT;
1990                         goto out;
1991                 }
1992                 path->slots[0]--;
1993                 ret = 0;
1994         }
1995         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1996         if (key.objectid != rec->ino) {
1997                 ret = -ENOENT;
1998                 goto out;
1999         }
2000
2001         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2002                             struct btrfs_inode_item);
2003         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2004         btrfs_mark_buffer_dirty(path->nodes[0]);
2005         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2006         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2007                root->root_key.objectid);
2008 out:
2009         btrfs_release_path(path);
2010         return ret;
2011 }
2012
2013 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2014                                     struct btrfs_root *root,
2015                                     struct btrfs_path *path,
2016                                     struct inode_record *rec)
2017 {
2018         int ret;
2019
2020         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2021         btrfs_release_path(path);
2022         if (!ret)
2023                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2024         return ret;
2025 }
2026
2027 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2028                                struct btrfs_root *root,
2029                                struct btrfs_path *path,
2030                                struct inode_record *rec)
2031 {
2032         struct btrfs_inode_item *ei;
2033         struct btrfs_key key;
2034         int ret = 0;
2035
2036         key.objectid = rec->ino;
2037         key.type = BTRFS_INODE_ITEM_KEY;
2038         key.offset = 0;
2039
2040         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2041         if (ret) {
2042                 if (ret > 0)
2043                         ret = -ENOENT;
2044                 goto out;
2045         }
2046
2047         /* Since ret == 0, no need to check anything */
2048         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2049                             struct btrfs_inode_item);
2050         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2051         btrfs_mark_buffer_dirty(path->nodes[0]);
2052         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2053         printf("reset nbytes for ino %llu root %llu\n",
2054                rec->ino, root->root_key.objectid);
2055 out:
2056         btrfs_release_path(path);
2057         return ret;
2058 }
2059
2060 static int add_missing_dir_index(struct btrfs_root *root,
2061                                  struct cache_tree *inode_cache,
2062                                  struct inode_record *rec,
2063                                  struct inode_backref *backref)
2064 {
2065         struct btrfs_path *path;
2066         struct btrfs_trans_handle *trans;
2067         struct btrfs_dir_item *dir_item;
2068         struct extent_buffer *leaf;
2069         struct btrfs_key key;
2070         struct btrfs_disk_key disk_key;
2071         struct inode_record *dir_rec;
2072         unsigned long name_ptr;
2073         u32 data_size = sizeof(*dir_item) + backref->namelen;
2074         int ret;
2075
2076         path = btrfs_alloc_path();
2077         if (!path)
2078                 return -ENOMEM;
2079
2080         trans = btrfs_start_transaction(root, 1);
2081         if (IS_ERR(trans)) {
2082                 btrfs_free_path(path);
2083                 return PTR_ERR(trans);
2084         }
2085
2086         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2087                 (unsigned long long)rec->ino);
2088         key.objectid = backref->dir;
2089         key.type = BTRFS_DIR_INDEX_KEY;
2090         key.offset = backref->index;
2091
2092         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2093         BUG_ON(ret);
2094
2095         leaf = path->nodes[0];
2096         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2097
2098         disk_key.objectid = cpu_to_le64(rec->ino);
2099         disk_key.type = BTRFS_INODE_ITEM_KEY;
2100         disk_key.offset = 0;
2101
2102         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2103         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2104         btrfs_set_dir_data_len(leaf, dir_item, 0);
2105         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2106         name_ptr = (unsigned long)(dir_item + 1);
2107         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2108         btrfs_mark_buffer_dirty(leaf);
2109         btrfs_free_path(path);
2110         btrfs_commit_transaction(trans, root);
2111
2112         backref->found_dir_index = 1;
2113         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2114         BUG_ON(IS_ERR(dir_rec));
2115         if (!dir_rec)
2116                 return 0;
2117         dir_rec->found_size += backref->namelen;
2118         if (dir_rec->found_size == dir_rec->isize &&
2119             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2120                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2121         if (dir_rec->found_size != dir_rec->isize)
2122                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2123
2124         return 0;
2125 }
2126
2127 static int delete_dir_index(struct btrfs_root *root,
2128                             struct cache_tree *inode_cache,
2129                             struct inode_record *rec,
2130                             struct inode_backref *backref)
2131 {
2132         struct btrfs_trans_handle *trans;
2133         struct btrfs_dir_item *di;
2134         struct btrfs_path *path;
2135         int ret = 0;
2136
2137         path = btrfs_alloc_path();
2138         if (!path)
2139                 return -ENOMEM;
2140
2141         trans = btrfs_start_transaction(root, 1);
2142         if (IS_ERR(trans)) {
2143                 btrfs_free_path(path);
2144                 return PTR_ERR(trans);
2145         }
2146
2147
2148         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2149                 (unsigned long long)backref->dir,
2150                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2151                 (unsigned long long)root->objectid);
2152
2153         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2154                                     backref->name, backref->namelen,
2155                                     backref->index, -1);
2156         if (IS_ERR(di)) {
2157                 ret = PTR_ERR(di);
2158                 btrfs_free_path(path);
2159                 btrfs_commit_transaction(trans, root);
2160                 if (ret == -ENOENT)
2161                         return 0;
2162                 return ret;
2163         }
2164
2165         if (!di)
2166                 ret = btrfs_del_item(trans, root, path);
2167         else
2168                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2169         BUG_ON(ret);
2170         btrfs_free_path(path);
2171         btrfs_commit_transaction(trans, root);
2172         return ret;
2173 }
2174
2175 static int create_inode_item(struct btrfs_root *root,
2176                              struct inode_record *rec,
2177                              struct inode_backref *backref, int root_dir)
2178 {
2179         struct btrfs_trans_handle *trans;
2180         struct btrfs_inode_item inode_item;
2181         time_t now = time(NULL);
2182         int ret;
2183
2184         trans = btrfs_start_transaction(root, 1);
2185         if (IS_ERR(trans)) {
2186                 ret = PTR_ERR(trans);
2187                 return ret;
2188         }
2189
2190         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2191                 "be incomplete, please check permissions and content after "
2192                 "the fsck completes.\n", (unsigned long long)root->objectid,
2193                 (unsigned long long)rec->ino);
2194
2195         memset(&inode_item, 0, sizeof(inode_item));
2196         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2197         if (root_dir)
2198                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2199         else
2200                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2201         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2202         if (rec->found_dir_item) {
2203                 if (rec->found_file_extent)
2204                         fprintf(stderr, "root %llu inode %llu has both a dir "
2205                                 "item and extents, unsure if it is a dir or a "
2206                                 "regular file so setting it as a directory\n",
2207                                 (unsigned long long)root->objectid,
2208                                 (unsigned long long)rec->ino);
2209                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2210                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2211         } else if (!rec->found_dir_item) {
2212                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2213                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2214         }
2215         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2216         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2217         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2218         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2219         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2220         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2221         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2222         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2223
2224         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2225         BUG_ON(ret);
2226         btrfs_commit_transaction(trans, root);
2227         return 0;
2228 }
2229
2230 static int repair_inode_backrefs(struct btrfs_root *root,
2231                                  struct inode_record *rec,
2232                                  struct cache_tree *inode_cache,
2233                                  int delete)
2234 {
2235         struct inode_backref *tmp, *backref;
2236         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2237         int ret = 0;
2238         int repaired = 0;
2239
2240         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2241                 if (!delete && rec->ino == root_dirid) {
2242                         if (!rec->found_inode_item) {
2243                                 ret = create_inode_item(root, rec, backref, 1);
2244                                 if (ret)
2245                                         break;
2246                                 repaired++;
2247                         }
2248                 }
2249
2250                 /* Index 0 for root dir's are special, don't mess with it */
2251                 if (rec->ino == root_dirid && backref->index == 0)
2252                         continue;
2253
2254                 if (delete &&
2255                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2256                      (backref->found_dir_index && backref->found_inode_ref &&
2257                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2258                         ret = delete_dir_index(root, inode_cache, rec, backref);
2259                         if (ret)
2260                                 break;
2261                         repaired++;
2262                         list_del(&backref->list);
2263                         free(backref);
2264                 }
2265
2266                 if (!delete && !backref->found_dir_index &&
2267                     backref->found_dir_item && backref->found_inode_ref) {
2268                         ret = add_missing_dir_index(root, inode_cache, rec,
2269                                                     backref);
2270                         if (ret)
2271                                 break;
2272                         repaired++;
2273                         if (backref->found_dir_item &&
2274                             backref->found_dir_index &&
2275                             backref->found_dir_index) {
2276                                 if (!backref->errors &&
2277                                     backref->found_inode_ref) {
2278                                         list_del(&backref->list);
2279                                         free(backref);
2280                                 }
2281                         }
2282                 }
2283
2284                 if (!delete && (!backref->found_dir_index &&
2285                                 !backref->found_dir_item &&
2286                                 backref->found_inode_ref)) {
2287                         struct btrfs_trans_handle *trans;
2288                         struct btrfs_key location;
2289
2290                         ret = check_dir_conflict(root, backref->name,
2291                                                  backref->namelen,
2292                                                  backref->dir,
2293                                                  backref->index);
2294                         if (ret) {
2295                                 /*
2296                                  * let nlink fixing routine to handle it,
2297                                  * which can do it better.
2298                                  */
2299                                 ret = 0;
2300                                 break;
2301                         }
2302                         location.objectid = rec->ino;
2303                         location.type = BTRFS_INODE_ITEM_KEY;
2304                         location.offset = 0;
2305
2306                         trans = btrfs_start_transaction(root, 1);
2307                         if (IS_ERR(trans)) {
2308                                 ret = PTR_ERR(trans);
2309                                 break;
2310                         }
2311                         fprintf(stderr, "adding missing dir index/item pair "
2312                                 "for inode %llu\n",
2313                                 (unsigned long long)rec->ino);
2314                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2315                                                     backref->namelen,
2316                                                     backref->dir, &location,
2317                                                     imode_to_type(rec->imode),
2318                                                     backref->index);
2319                         BUG_ON(ret);
2320                         btrfs_commit_transaction(trans, root);
2321                         repaired++;
2322                 }
2323
2324                 if (!delete && (backref->found_inode_ref &&
2325                                 backref->found_dir_index &&
2326                                 backref->found_dir_item &&
2327                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2328                                 !rec->found_inode_item)) {
2329                         ret = create_inode_item(root, rec, backref, 0);
2330                         if (ret)
2331                                 break;
2332                         repaired++;
2333                 }
2334
2335         }
2336         return ret ? ret : repaired;
2337 }
2338
2339 /*
2340  * To determine the file type for nlink/inode_item repair
2341  *
2342  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2343  * Return -ENOENT if file type is not found.
2344  */
2345 static int find_file_type(struct inode_record *rec, u8 *type)
2346 {
2347         struct inode_backref *backref;
2348
2349         /* For inode item recovered case */
2350         if (rec->found_inode_item) {
2351                 *type = imode_to_type(rec->imode);
2352                 return 0;
2353         }
2354
2355         list_for_each_entry(backref, &rec->backrefs, list) {
2356                 if (backref->found_dir_index || backref->found_dir_item) {
2357                         *type = backref->filetype;
2358                         return 0;
2359                 }
2360         }
2361         return -ENOENT;
2362 }
2363
2364 /*
2365  * To determine the file name for nlink repair
2366  *
2367  * Return 0 if file name is found, set name and namelen.
2368  * Return -ENOENT if file name is not found.
2369  */
2370 static int find_file_name(struct inode_record *rec,
2371                           char *name, int *namelen)
2372 {
2373         struct inode_backref *backref;
2374
2375         list_for_each_entry(backref, &rec->backrefs, list) {
2376                 if (backref->found_dir_index || backref->found_dir_item ||
2377                     backref->found_inode_ref) {
2378                         memcpy(name, backref->name, backref->namelen);
2379                         *namelen = backref->namelen;
2380                         return 0;
2381                 }
2382         }
2383         return -ENOENT;
2384 }
2385
2386 /* Reset the nlink of the inode to the correct one */
2387 static int reset_nlink(struct btrfs_trans_handle *trans,
2388                        struct btrfs_root *root,
2389                        struct btrfs_path *path,
2390                        struct inode_record *rec)
2391 {
2392         struct inode_backref *backref;
2393         struct inode_backref *tmp;
2394         struct btrfs_key key;
2395         struct btrfs_inode_item *inode_item;
2396         int ret = 0;
2397
2398         /* We don't believe this either, reset it and iterate backref */
2399         rec->found_link = 0;
2400
2401         /* Remove all backref including the valid ones */
2402         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2403                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2404                                    backref->index, backref->name,
2405                                    backref->namelen, 0);
2406                 if (ret < 0)
2407                         goto out;
2408
2409                 /* remove invalid backref, so it won't be added back */
2410                 if (!(backref->found_dir_index &&
2411                       backref->found_dir_item &&
2412                       backref->found_inode_ref)) {
2413                         list_del(&backref->list);
2414                         free(backref);
2415                 } else {
2416                         rec->found_link++;
2417                 }
2418         }
2419
2420         /* Set nlink to 0 */
2421         key.objectid = rec->ino;
2422         key.type = BTRFS_INODE_ITEM_KEY;
2423         key.offset = 0;
2424         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2425         if (ret < 0)
2426                 goto out;
2427         if (ret > 0) {
2428                 ret = -ENOENT;
2429                 goto out;
2430         }
2431         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2432                                     struct btrfs_inode_item);
2433         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2434         btrfs_mark_buffer_dirty(path->nodes[0]);
2435         btrfs_release_path(path);
2436
2437         /*
2438          * Add back valid inode_ref/dir_item/dir_index,
2439          * add_link() will handle the nlink inc, so new nlink must be correct
2440          */
2441         list_for_each_entry(backref, &rec->backrefs, list) {
2442                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2443                                      backref->name, backref->namelen,
2444                                      backref->filetype, &backref->index, 1);
2445                 if (ret < 0)
2446                         goto out;
2447         }
2448 out:
2449         btrfs_release_path(path);
2450         return ret;
2451 }
2452
2453 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2454                                struct btrfs_root *root,
2455                                struct btrfs_path *path,
2456                                struct inode_record *rec)
2457 {
2458         char *dir_name = "lost+found";
2459         char namebuf[BTRFS_NAME_LEN] = {0};
2460         u64 lost_found_ino;
2461         u32 mode = 0700;
2462         u8 type = 0;
2463         int namelen = 0;
2464         int name_recovered = 0;
2465         int type_recovered = 0;
2466         int ret = 0;
2467
2468         /*
2469          * Get file name and type first before these invalid inode ref
2470          * are deleted by remove_all_invalid_backref()
2471          */
2472         name_recovered = !find_file_name(rec, namebuf, &namelen);
2473         type_recovered = !find_file_type(rec, &type);
2474
2475         if (!name_recovered) {
2476                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2477                        rec->ino, rec->ino);
2478                 namelen = count_digits(rec->ino);
2479                 sprintf(namebuf, "%llu", rec->ino);
2480                 name_recovered = 1;
2481         }
2482         if (!type_recovered) {
2483                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2484                        rec->ino);
2485                 type = BTRFS_FT_REG_FILE;
2486                 type_recovered = 1;
2487         }
2488
2489         ret = reset_nlink(trans, root, path, rec);
2490         if (ret < 0) {
2491                 fprintf(stderr,
2492                         "Failed to reset nlink for inode %llu: %s\n",
2493                         rec->ino, strerror(-ret));
2494                 goto out;
2495         }
2496
2497         if (rec->found_link == 0) {
2498                 lost_found_ino = root->highest_inode;
2499                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2500                         ret = -EOVERFLOW;
2501                         goto out;
2502                 }
2503                 lost_found_ino++;
2504                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2505                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2506                                   mode);
2507                 if (ret < 0) {
2508                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2509                                 dir_name, strerror(-ret));
2510                         goto out;
2511                 }
2512                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2513                                      namebuf, namelen, type, NULL, 1);
2514                 /*
2515                  * Add ".INO" suffix several times to handle case where
2516                  * "FILENAME.INO" is already taken by another file.
2517                  */
2518                 while (ret == -EEXIST) {
2519                         /*
2520                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2521                          */
2522                         if (namelen + count_digits(rec->ino) + 1 >
2523                             BTRFS_NAME_LEN) {
2524                                 ret = -EFBIG;
2525                                 goto out;
2526                         }
2527                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2528                                  ".%llu", rec->ino);
2529                         namelen += count_digits(rec->ino) + 1;
2530                         ret = btrfs_add_link(trans, root, rec->ino,
2531                                              lost_found_ino, namebuf,
2532                                              namelen, type, NULL, 1);
2533                 }
2534                 if (ret < 0) {
2535                         fprintf(stderr,
2536                                 "Failed to link the inode %llu to %s dir: %s\n",
2537                                 rec->ino, dir_name, strerror(-ret));
2538                         goto out;
2539                 }
2540                 /*
2541                  * Just increase the found_link, don't actually add the
2542                  * backref. This will make things easier and this inode
2543                  * record will be freed after the repair is done.
2544                  * So fsck will not report problem about this inode.
2545                  */
2546                 rec->found_link++;
2547                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2548                        namelen, namebuf, dir_name);
2549         }
2550         printf("Fixed the nlink of inode %llu\n", rec->ino);
2551 out:
2552         /*
2553          * Clear the flag anyway, or we will loop forever for the same inode
2554          * as it will not be removed from the bad inode list and the dead loop
2555          * happens.
2556          */
2557         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2558         btrfs_release_path(path);
2559         return ret;
2560 }
2561
2562 /*
2563  * Check if there is any normal(reg or prealloc) file extent for given
2564  * ino.
2565  * This is used to determine the file type when neither its dir_index/item or
2566  * inode_item exists.
2567  *
2568  * This will *NOT* report error, if any error happens, just consider it does
2569  * not have any normal file extent.
2570  */
2571 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2572 {
2573         struct btrfs_path *path;
2574         struct btrfs_key key;
2575         struct btrfs_key found_key;
2576         struct btrfs_file_extent_item *fi;
2577         u8 type;
2578         int ret = 0;
2579
2580         path = btrfs_alloc_path();
2581         if (!path)
2582                 goto out;
2583         key.objectid = ino;
2584         key.type = BTRFS_EXTENT_DATA_KEY;
2585         key.offset = 0;
2586
2587         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2588         if (ret < 0) {
2589                 ret = 0;
2590                 goto out;
2591         }
2592         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2593                 ret = btrfs_next_leaf(root, path);
2594                 if (ret) {
2595                         ret = 0;
2596                         goto out;
2597                 }
2598         }
2599         while (1) {
2600                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2601                                       path->slots[0]);
2602                 if (found_key.objectid != ino ||
2603                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2604                         break;
2605                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2606                                     struct btrfs_file_extent_item);
2607                 type = btrfs_file_extent_type(path->nodes[0], fi);
2608                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2609                         ret = 1;
2610                         goto out;
2611                 }
2612         }
2613 out:
2614         btrfs_free_path(path);
2615         return ret;
2616 }
2617
2618 static u32 btrfs_type_to_imode(u8 type)
2619 {
2620         static u32 imode_by_btrfs_type[] = {
2621                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2622                 [BTRFS_FT_DIR]          = S_IFDIR,
2623                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2624                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2625                 [BTRFS_FT_FIFO]         = S_IFIFO,
2626                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2627                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2628         };
2629
2630         return imode_by_btrfs_type[(type)];
2631 }
2632
2633 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2634                                 struct btrfs_root *root,
2635                                 struct btrfs_path *path,
2636                                 struct inode_record *rec)
2637 {
2638         u8 filetype;
2639         u32 mode = 0700;
2640         int type_recovered = 0;
2641         int ret = 0;
2642
2643         printf("Trying to rebuild inode:%llu\n", rec->ino);
2644
2645         type_recovered = !find_file_type(rec, &filetype);
2646
2647         /*
2648          * Try to determine inode type if type not found.
2649          *
2650          * For found regular file extent, it must be FILE.
2651          * For found dir_item/index, it must be DIR.
2652          *
2653          * For undetermined one, use FILE as fallback.
2654          *
2655          * TODO:
2656          * 1. If found backref(inode_index/item is already handled) to it,
2657          *    it must be DIR.
2658          *    Need new inode-inode ref structure to allow search for that.
2659          */
2660         if (!type_recovered) {
2661                 if (rec->found_file_extent &&
2662                     find_normal_file_extent(root, rec->ino)) {
2663                         type_recovered = 1;
2664                         filetype = BTRFS_FT_REG_FILE;
2665                 } else if (rec->found_dir_item) {
2666                         type_recovered = 1;
2667                         filetype = BTRFS_FT_DIR;
2668                 } else if (!list_empty(&rec->orphan_extents)) {
2669                         type_recovered = 1;
2670                         filetype = BTRFS_FT_REG_FILE;
2671                 } else{
2672                         printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2673                                rec->ino);
2674                         type_recovered = 1;
2675                         filetype = BTRFS_FT_REG_FILE;
2676                 }
2677         }
2678
2679         ret = btrfs_new_inode(trans, root, rec->ino,
2680                               mode | btrfs_type_to_imode(filetype));
2681         if (ret < 0)
2682                 goto out;
2683
2684         /*
2685          * Here inode rebuild is done, we only rebuild the inode item,
2686          * don't repair the nlink(like move to lost+found).
2687          * That is the job of nlink repair.
2688          *
2689          * We just fill the record and return
2690          */
2691         rec->found_dir_item = 1;
2692         rec->imode = mode | btrfs_type_to_imode(filetype);
2693         rec->nlink = 0;
2694         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2695         /* Ensure the inode_nlinks repair function will be called */
2696         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2697 out:
2698         return ret;
2699 }
2700
2701 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2702                                       struct btrfs_root *root,
2703                                       struct btrfs_path *path,
2704                                       struct inode_record *rec)
2705 {
2706         struct orphan_data_extent *orphan;
2707         struct orphan_data_extent *tmp;
2708         int ret = 0;
2709
2710         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2711                 /*
2712                  * Check for conflicting file extents
2713                  *
2714                  * Here we don't know whether the extents is compressed or not,
2715                  * so we can only assume it not compressed nor data offset,
2716                  * and use its disk_len as extent length.
2717                  */
2718                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2719                                        orphan->offset, orphan->disk_len, 0);
2720                 btrfs_release_path(path);
2721                 if (ret < 0)
2722                         goto out;
2723                 if (!ret) {
2724                         fprintf(stderr,
2725                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2726                                 orphan->disk_bytenr, orphan->disk_len);
2727                         ret = btrfs_free_extent(trans,
2728                                         root->fs_info->extent_root,
2729                                         orphan->disk_bytenr, orphan->disk_len,
2730                                         0, root->objectid, orphan->objectid,
2731                                         orphan->offset);
2732                         if (ret < 0)
2733                                 goto out;
2734                 }
2735                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2736                                 orphan->offset, orphan->disk_bytenr,
2737                                 orphan->disk_len, orphan->disk_len);
2738                 if (ret < 0)
2739                         goto out;
2740
2741                 /* Update file size info */
2742                 rec->found_size += orphan->disk_len;
2743                 if (rec->found_size == rec->nbytes)
2744                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2745
2746                 /* Update the file extent hole info too */
2747                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2748                                            orphan->disk_len);
2749                 if (ret < 0)
2750                         goto out;
2751                 if (RB_EMPTY_ROOT(&rec->holes))
2752                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2753
2754                 list_del(&orphan->list);
2755                 free(orphan);
2756         }
2757         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2758 out:
2759         return ret;
2760 }
2761
2762 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2763                                         struct btrfs_root *root,
2764                                         struct btrfs_path *path,
2765                                         struct inode_record *rec)
2766 {
2767         struct rb_node *node;
2768         struct file_extent_hole *hole;
2769         int found = 0;
2770         int ret = 0;
2771
2772         node = rb_first(&rec->holes);
2773
2774         while (node) {
2775                 found = 1;
2776                 hole = rb_entry(node, struct file_extent_hole, node);
2777                 ret = btrfs_punch_hole(trans, root, rec->ino,
2778                                        hole->start, hole->len);
2779                 if (ret < 0)
2780                         goto out;
2781                 ret = del_file_extent_hole(&rec->holes, hole->start,
2782                                            hole->len);
2783                 if (ret < 0)
2784                         goto out;
2785                 if (RB_EMPTY_ROOT(&rec->holes))
2786                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2787                 node = rb_first(&rec->holes);
2788         }
2789         /* special case for a file losing all its file extent */
2790         if (!found) {
2791                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2792                                        round_up(rec->isize, root->sectorsize));
2793                 if (ret < 0)
2794                         goto out;
2795         }
2796         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2797                rec->ino, root->objectid);
2798 out:
2799         return ret;
2800 }
2801
2802 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2803 {
2804         struct btrfs_trans_handle *trans;
2805         struct btrfs_path *path;
2806         int ret = 0;
2807
2808         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2809                              I_ERR_NO_ORPHAN_ITEM |
2810                              I_ERR_LINK_COUNT_WRONG |
2811                              I_ERR_NO_INODE_ITEM |
2812                              I_ERR_FILE_EXTENT_ORPHAN |
2813                              I_ERR_FILE_EXTENT_DISCOUNT|
2814                              I_ERR_FILE_NBYTES_WRONG)))
2815                 return rec->errors;
2816
2817         path = btrfs_alloc_path();
2818         if (!path)
2819                 return -ENOMEM;
2820
2821         /*
2822          * For nlink repair, it may create a dir and add link, so
2823          * 2 for parent(256)'s dir_index and dir_item
2824          * 2 for lost+found dir's inode_item and inode_ref
2825          * 1 for the new inode_ref of the file
2826          * 2 for lost+found dir's dir_index and dir_item for the file
2827          */
2828         trans = btrfs_start_transaction(root, 7);
2829         if (IS_ERR(trans)) {
2830                 btrfs_free_path(path);
2831                 return PTR_ERR(trans);
2832         }
2833
2834         if (rec->errors & I_ERR_NO_INODE_ITEM)
2835                 ret = repair_inode_no_item(trans, root, path, rec);
2836         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2837                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2838         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2839                 ret = repair_inode_discount_extent(trans, root, path, rec);
2840         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2841                 ret = repair_inode_isize(trans, root, path, rec);
2842         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2843                 ret = repair_inode_orphan_item(trans, root, path, rec);
2844         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2845                 ret = repair_inode_nlinks(trans, root, path, rec);
2846         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2847                 ret = repair_inode_nbytes(trans, root, path, rec);
2848         btrfs_commit_transaction(trans, root);
2849         btrfs_free_path(path);
2850         return ret;
2851 }
2852
2853 static int check_inode_recs(struct btrfs_root *root,
2854                             struct cache_tree *inode_cache)
2855 {
2856         struct cache_extent *cache;
2857         struct ptr_node *node;
2858         struct inode_record *rec;
2859         struct inode_backref *backref;
2860         int stage = 0;
2861         int ret = 0;
2862         int err = 0;
2863         u64 error = 0;
2864         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2865
2866         if (btrfs_root_refs(&root->root_item) == 0) {
2867                 if (!cache_tree_empty(inode_cache))
2868                         fprintf(stderr, "warning line %d\n", __LINE__);
2869                 return 0;
2870         }
2871
2872         /*
2873          * We need to record the highest inode number for later 'lost+found'
2874          * dir creation.
2875          * We must select a ino not used/refered by any existing inode, or
2876          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2877          * this may cause 'lost+found' dir has wrong nlinks.
2878          */
2879         cache = last_cache_extent(inode_cache);
2880         if (cache) {
2881                 node = container_of(cache, struct ptr_node, cache);
2882                 rec = node->data;
2883                 if (rec->ino > root->highest_inode)
2884                         root->highest_inode = rec->ino;
2885         }
2886
2887         /*
2888          * We need to repair backrefs first because we could change some of the
2889          * errors in the inode recs.
2890          *
2891          * We also need to go through and delete invalid backrefs first and then
2892          * add the correct ones second.  We do this because we may get EEXIST
2893          * when adding back the correct index because we hadn't yet deleted the
2894          * invalid index.
2895          *
2896          * For example, if we were missing a dir index then the directories
2897          * isize would be wrong, so if we fixed the isize to what we thought it
2898          * would be and then fixed the backref we'd still have a invalid fs, so
2899          * we need to add back the dir index and then check to see if the isize
2900          * is still wrong.
2901          */
2902         while (stage < 3) {
2903                 stage++;
2904                 if (stage == 3 && !err)
2905                         break;
2906
2907                 cache = search_cache_extent(inode_cache, 0);
2908                 while (repair && cache) {
2909                         node = container_of(cache, struct ptr_node, cache);
2910                         rec = node->data;
2911                         cache = next_cache_extent(cache);
2912
2913                         /* Need to free everything up and rescan */
2914                         if (stage == 3) {
2915                                 remove_cache_extent(inode_cache, &node->cache);
2916                                 free(node);
2917                                 free_inode_rec(rec);
2918                                 continue;
2919                         }
2920
2921                         if (list_empty(&rec->backrefs))
2922                                 continue;
2923
2924                         ret = repair_inode_backrefs(root, rec, inode_cache,
2925                                                     stage == 1);
2926                         if (ret < 0) {
2927                                 err = ret;
2928                                 stage = 2;
2929                                 break;
2930                         } if (ret > 0) {
2931                                 err = -EAGAIN;
2932                         }
2933                 }
2934         }
2935         if (err)
2936                 return err;
2937
2938         rec = get_inode_rec(inode_cache, root_dirid, 0);
2939         BUG_ON(IS_ERR(rec));
2940         if (rec) {
2941                 ret = check_root_dir(rec);
2942                 if (ret) {
2943                         fprintf(stderr, "root %llu root dir %llu error\n",
2944                                 (unsigned long long)root->root_key.objectid,
2945                                 (unsigned long long)root_dirid);
2946                         print_inode_error(root, rec);
2947                         error++;
2948                 }
2949         } else {
2950                 if (repair) {
2951                         struct btrfs_trans_handle *trans;
2952
2953                         trans = btrfs_start_transaction(root, 1);
2954                         if (IS_ERR(trans)) {
2955                                 err = PTR_ERR(trans);
2956                                 return err;
2957                         }
2958
2959                         fprintf(stderr,
2960                                 "root %llu missing its root dir, recreating\n",
2961                                 (unsigned long long)root->objectid);
2962
2963                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2964                         BUG_ON(ret);
2965
2966                         btrfs_commit_transaction(trans, root);
2967                         return -EAGAIN;
2968                 }
2969
2970                 fprintf(stderr, "root %llu root dir %llu not found\n",
2971                         (unsigned long long)root->root_key.objectid,
2972                         (unsigned long long)root_dirid);
2973         }
2974
2975         while (1) {
2976                 cache = search_cache_extent(inode_cache, 0);
2977                 if (!cache)
2978                         break;
2979                 node = container_of(cache, struct ptr_node, cache);
2980                 rec = node->data;
2981                 remove_cache_extent(inode_cache, &node->cache);
2982                 free(node);
2983                 if (rec->ino == root_dirid ||
2984                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2985                         free_inode_rec(rec);
2986                         continue;
2987                 }
2988
2989                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2990                         ret = check_orphan_item(root, rec->ino);
2991                         if (ret == 0)
2992                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2993                         if (can_free_inode_rec(rec)) {
2994                                 free_inode_rec(rec);
2995                                 continue;
2996                         }
2997                 }
2998
2999                 if (!rec->found_inode_item)
3000                         rec->errors |= I_ERR_NO_INODE_ITEM;
3001                 if (rec->found_link != rec->nlink)
3002                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3003                 if (repair) {
3004                         ret = try_repair_inode(root, rec);
3005                         if (ret == 0 && can_free_inode_rec(rec)) {
3006                                 free_inode_rec(rec);
3007                                 continue;
3008                         }
3009                         ret = 0;
3010                 }
3011
3012                 if (!(repair && ret == 0))
3013                         error++;
3014                 print_inode_error(root, rec);
3015                 list_for_each_entry(backref, &rec->backrefs, list) {
3016                         if (!backref->found_dir_item)
3017                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3018                         if (!backref->found_dir_index)
3019                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3020                         if (!backref->found_inode_ref)
3021                                 backref->errors |= REF_ERR_NO_INODE_REF;
3022                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3023                                 " namelen %u name %s filetype %d errors %x",
3024                                 (unsigned long long)backref->dir,
3025                                 (unsigned long long)backref->index,
3026                                 backref->namelen, backref->name,
3027                                 backref->filetype, backref->errors);
3028                         print_ref_error(backref->errors);
3029                 }
3030                 free_inode_rec(rec);
3031         }
3032         return (error > 0) ? -1 : 0;
3033 }
3034
3035 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3036                                         u64 objectid)
3037 {
3038         struct cache_extent *cache;
3039         struct root_record *rec = NULL;
3040         int ret;
3041
3042         cache = lookup_cache_extent(root_cache, objectid, 1);
3043         if (cache) {
3044                 rec = container_of(cache, struct root_record, cache);
3045         } else {
3046                 rec = calloc(1, sizeof(*rec));
3047                 if (!rec)
3048                         return ERR_PTR(-ENOMEM);
3049                 rec->objectid = objectid;
3050                 INIT_LIST_HEAD(&rec->backrefs);
3051                 rec->cache.start = objectid;
3052                 rec->cache.size = 1;
3053
3054                 ret = insert_cache_extent(root_cache, &rec->cache);
3055                 if (ret)
3056                         return ERR_PTR(-EEXIST);
3057         }
3058         return rec;
3059 }
3060
3061 static struct root_backref *get_root_backref(struct root_record *rec,
3062                                              u64 ref_root, u64 dir, u64 index,
3063                                              const char *name, int namelen)
3064 {
3065         struct root_backref *backref;
3066
3067         list_for_each_entry(backref, &rec->backrefs, list) {
3068                 if (backref->ref_root != ref_root || backref->dir != dir ||
3069                     backref->namelen != namelen)
3070                         continue;
3071                 if (memcmp(name, backref->name, namelen))
3072                         continue;
3073                 return backref;
3074         }
3075
3076         backref = calloc(1, sizeof(*backref) + namelen + 1);
3077         if (!backref)
3078                 return NULL;
3079         backref->ref_root = ref_root;
3080         backref->dir = dir;
3081         backref->index = index;
3082         backref->namelen = namelen;
3083         memcpy(backref->name, name, namelen);
3084         backref->name[namelen] = '\0';
3085         list_add_tail(&backref->list, &rec->backrefs);
3086         return backref;
3087 }
3088
3089 static void free_root_record(struct cache_extent *cache)
3090 {
3091         struct root_record *rec;
3092         struct root_backref *backref;
3093
3094         rec = container_of(cache, struct root_record, cache);
3095         while (!list_empty(&rec->backrefs)) {
3096                 backref = list_entry(rec->backrefs.next,
3097                                      struct root_backref, list);
3098                 list_del(&backref->list);
3099                 free(backref);
3100         }
3101
3102         kfree(rec);
3103 }
3104
3105 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3106
3107 static int add_root_backref(struct cache_tree *root_cache,
3108                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3109                             const char *name, int namelen,
3110                             int item_type, int errors)
3111 {
3112         struct root_record *rec;
3113         struct root_backref *backref;
3114
3115         rec = get_root_rec(root_cache, root_id);
3116         BUG_ON(IS_ERR(rec));
3117         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3118         BUG_ON(!backref);
3119
3120         backref->errors |= errors;
3121
3122         if (item_type != BTRFS_DIR_ITEM_KEY) {
3123                 if (backref->found_dir_index || backref->found_back_ref ||
3124                     backref->found_forward_ref) {
3125                         if (backref->index != index)
3126                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3127                 } else {
3128                         backref->index = index;
3129                 }
3130         }
3131
3132         if (item_type == BTRFS_DIR_ITEM_KEY) {
3133                 if (backref->found_forward_ref)
3134                         rec->found_ref++;
3135                 backref->found_dir_item = 1;
3136         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3137                 backref->found_dir_index = 1;
3138         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3139                 if (backref->found_forward_ref)
3140                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3141                 else if (backref->found_dir_item)
3142                         rec->found_ref++;
3143                 backref->found_forward_ref = 1;
3144         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3145                 if (backref->found_back_ref)
3146                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3147                 backref->found_back_ref = 1;
3148         } else {
3149                 BUG_ON(1);
3150         }
3151
3152         if (backref->found_forward_ref && backref->found_dir_item)
3153                 backref->reachable = 1;
3154         return 0;
3155 }
3156
3157 static int merge_root_recs(struct btrfs_root *root,
3158                            struct cache_tree *src_cache,
3159                            struct cache_tree *dst_cache)
3160 {
3161         struct cache_extent *cache;
3162         struct ptr_node *node;
3163         struct inode_record *rec;
3164         struct inode_backref *backref;
3165         int ret = 0;
3166
3167         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3168                 free_inode_recs_tree(src_cache);
3169                 return 0;
3170         }
3171
3172         while (1) {
3173                 cache = search_cache_extent(src_cache, 0);
3174                 if (!cache)
3175                         break;
3176                 node = container_of(cache, struct ptr_node, cache);
3177                 rec = node->data;
3178                 remove_cache_extent(src_cache, &node->cache);
3179                 free(node);
3180
3181                 ret = is_child_root(root, root->objectid, rec->ino);
3182                 if (ret < 0)
3183                         break;
3184                 else if (ret == 0)
3185                         goto skip;
3186
3187                 list_for_each_entry(backref, &rec->backrefs, list) {
3188                         BUG_ON(backref->found_inode_ref);
3189                         if (backref->found_dir_item)
3190                                 add_root_backref(dst_cache, rec->ino,
3191                                         root->root_key.objectid, backref->dir,
3192                                         backref->index, backref->name,
3193                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3194                                         backref->errors);
3195                         if (backref->found_dir_index)
3196                                 add_root_backref(dst_cache, rec->ino,
3197                                         root->root_key.objectid, backref->dir,
3198                                         backref->index, backref->name,
3199                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3200                                         backref->errors);
3201                 }
3202 skip:
3203                 free_inode_rec(rec);
3204         }
3205         if (ret < 0)
3206                 return ret;
3207         return 0;
3208 }
3209
3210 static int check_root_refs(struct btrfs_root *root,
3211                            struct cache_tree *root_cache)
3212 {
3213         struct root_record *rec;
3214         struct root_record *ref_root;
3215         struct root_backref *backref;
3216         struct cache_extent *cache;
3217         int loop = 1;
3218         int ret;
3219         int error;
3220         int errors = 0;
3221
3222         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3223         BUG_ON(IS_ERR(rec));
3224         rec->found_ref = 1;
3225
3226         /* fixme: this can not detect circular references */
3227         while (loop) {
3228                 loop = 0;
3229                 cache = search_cache_extent(root_cache, 0);
3230                 while (1) {
3231                         if (!cache)
3232                                 break;
3233                         rec = container_of(cache, struct root_record, cache);
3234                         cache = next_cache_extent(cache);
3235
3236                         if (rec->found_ref == 0)
3237                                 continue;
3238
3239                         list_for_each_entry(backref, &rec->backrefs, list) {
3240                                 if (!backref->reachable)
3241                                         continue;
3242
3243                                 ref_root = get_root_rec(root_cache,
3244                                                         backref->ref_root);
3245                                 BUG_ON(IS_ERR(ref_root));
3246                                 if (ref_root->found_ref > 0)
3247                                         continue;
3248
3249                                 backref->reachable = 0;
3250                                 rec->found_ref--;
3251                                 if (rec->found_ref == 0)
3252                                         loop = 1;
3253                         }
3254                 }
3255         }
3256
3257         cache = search_cache_extent(root_cache, 0);
3258         while (1) {
3259                 if (!cache)
3260                         break;
3261                 rec = container_of(cache, struct root_record, cache);
3262                 cache = next_cache_extent(cache);
3263
3264                 if (rec->found_ref == 0 &&
3265                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3266                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3267                         ret = check_orphan_item(root->fs_info->tree_root,
3268                                                 rec->objectid);
3269                         if (ret == 0)
3270                                 continue;
3271
3272                         /*
3273                          * If we don't have a root item then we likely just have
3274                          * a dir item in a snapshot for this root but no actual
3275                          * ref key or anything so it's meaningless.
3276                          */
3277                         if (!rec->found_root_item)
3278                                 continue;
3279                         errors++;
3280                         fprintf(stderr, "fs tree %llu not referenced\n",
3281                                 (unsigned long long)rec->objectid);
3282                 }
3283
3284                 error = 0;
3285                 if (rec->found_ref > 0 && !rec->found_root_item)
3286                         error = 1;
3287                 list_for_each_entry(backref, &rec->backrefs, list) {
3288                         if (!backref->found_dir_item)
3289                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3290                         if (!backref->found_dir_index)
3291                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3292                         if (!backref->found_back_ref)
3293                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3294                         if (!backref->found_forward_ref)
3295                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3296                         if (backref->reachable && backref->errors)
3297                                 error = 1;
3298                 }
3299                 if (!error)
3300                         continue;
3301
3302                 errors++;
3303                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3304                         (unsigned long long)rec->objectid, rec->found_ref,
3305                          rec->found_root_item ? "" : "not found");
3306
3307                 list_for_each_entry(backref, &rec->backrefs, list) {
3308                         if (!backref->reachable)
3309                                 continue;
3310                         if (!backref->errors && rec->found_root_item)
3311                                 continue;
3312                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3313                                 " index %llu namelen %u name %s errors %x\n",
3314                                 (unsigned long long)backref->ref_root,
3315                                 (unsigned long long)backref->dir,
3316                                 (unsigned long long)backref->index,
3317                                 backref->namelen, backref->name,
3318                                 backref->errors);
3319                         print_ref_error(backref->errors);
3320                 }
3321         }
3322         return errors > 0 ? 1 : 0;
3323 }
3324
3325 static int process_root_ref(struct extent_buffer *eb, int slot,
3326                             struct btrfs_key *key,
3327                             struct cache_tree *root_cache)
3328 {
3329         u64 dirid;
3330         u64 index;
3331         u32 len;
3332         u32 name_len;
3333         struct btrfs_root_ref *ref;
3334         char namebuf[BTRFS_NAME_LEN];
3335         int error;
3336
3337         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3338
3339         dirid = btrfs_root_ref_dirid(eb, ref);
3340         index = btrfs_root_ref_sequence(eb, ref);
3341         name_len = btrfs_root_ref_name_len(eb, ref);
3342
3343         if (name_len <= BTRFS_NAME_LEN) {
3344                 len = name_len;
3345                 error = 0;
3346         } else {
3347                 len = BTRFS_NAME_LEN;
3348                 error = REF_ERR_NAME_TOO_LONG;
3349         }
3350         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3351
3352         if (key->type == BTRFS_ROOT_REF_KEY) {
3353                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3354                                  index, namebuf, len, key->type, error);
3355         } else {
3356                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3357                                  index, namebuf, len, key->type, error);
3358         }
3359         return 0;
3360 }
3361
3362 static void free_corrupt_block(struct cache_extent *cache)
3363 {
3364         struct btrfs_corrupt_block *corrupt;
3365
3366         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3367         free(corrupt);
3368 }
3369
3370 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3371
3372 /*
3373  * Repair the btree of the given root.
3374  *
3375  * The fix is to remove the node key in corrupt_blocks cache_tree.
3376  * and rebalance the tree.
3377  * After the fix, the btree should be writeable.
3378  */
3379 static int repair_btree(struct btrfs_root *root,
3380                         struct cache_tree *corrupt_blocks)
3381 {
3382         struct btrfs_trans_handle *trans;
3383         struct btrfs_path *path;
3384         struct btrfs_corrupt_block *corrupt;
3385         struct cache_extent *cache;
3386         struct btrfs_key key;
3387         u64 offset;
3388         int level;
3389         int ret = 0;
3390
3391         if (cache_tree_empty(corrupt_blocks))
3392                 return 0;
3393
3394         path = btrfs_alloc_path();
3395         if (!path)
3396                 return -ENOMEM;
3397
3398         trans = btrfs_start_transaction(root, 1);
3399         if (IS_ERR(trans)) {
3400                 ret = PTR_ERR(trans);
3401                 fprintf(stderr, "Error starting transaction: %s\n",
3402                         strerror(-ret));
3403                 goto out_free_path;
3404         }
3405         cache = first_cache_extent(corrupt_blocks);
3406         while (cache) {
3407                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3408                                        cache);
3409                 level = corrupt->level;
3410                 path->lowest_level = level;
3411                 key.objectid = corrupt->key.objectid;
3412                 key.type = corrupt->key.type;
3413                 key.offset = corrupt->key.offset;
3414
3415                 /*
3416                  * Here we don't want to do any tree balance, since it may
3417                  * cause a balance with corrupted brother leaf/node,
3418                  * so ins_len set to 0 here.
3419                  * Balance will be done after all corrupt node/leaf is deleted.
3420                  */
3421                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3422                 if (ret < 0)
3423                         goto out;
3424                 offset = btrfs_node_blockptr(path->nodes[level],
3425                                              path->slots[level]);
3426
3427                 /* Remove the ptr */
3428                 ret = btrfs_del_ptr(trans, root, path, level,
3429                                     path->slots[level]);
3430                 if (ret < 0)
3431                         goto out;
3432                 /*
3433                  * Remove the corresponding extent
3434                  * return value is not concerned.
3435                  */
3436                 btrfs_release_path(path);
3437                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3438                                         0, root->root_key.objectid,
3439                                         level - 1, 0);
3440                 cache = next_cache_extent(cache);
3441         }
3442
3443         /* Balance the btree using btrfs_search_slot() */
3444         cache = first_cache_extent(corrupt_blocks);
3445         while (cache) {
3446                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3447                                        cache);
3448                 memcpy(&key, &corrupt->key, sizeof(key));
3449                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3450                 if (ret < 0)
3451                         goto out;
3452                 /* return will always >0 since it won't find the item */
3453                 ret = 0;
3454                 btrfs_release_path(path);
3455                 cache = next_cache_extent(cache);
3456         }
3457 out:
3458         btrfs_commit_transaction(trans, root);
3459 out_free_path:
3460         btrfs_free_path(path);
3461         return ret;
3462 }
3463
3464 static int check_fs_root(struct btrfs_root *root,
3465                          struct cache_tree *root_cache,
3466                          struct walk_control *wc)
3467 {
3468         int ret = 0;
3469         int err = 0;
3470         int wret;
3471         int level;
3472         struct btrfs_path path;
3473         struct shared_node root_node;
3474         struct root_record *rec;
3475         struct btrfs_root_item *root_item = &root->root_item;
3476         struct cache_tree corrupt_blocks;
3477         struct orphan_data_extent *orphan;
3478         struct orphan_data_extent *tmp;
3479         enum btrfs_tree_block_status status;
3480
3481         /*
3482          * Reuse the corrupt_block cache tree to record corrupted tree block
3483          *
3484          * Unlike the usage in extent tree check, here we do it in a per
3485          * fs/subvol tree base.
3486          */
3487         cache_tree_init(&corrupt_blocks);
3488         root->fs_info->corrupt_blocks = &corrupt_blocks;
3489
3490         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3491                 rec = get_root_rec(root_cache, root->root_key.objectid);
3492                 BUG_ON(IS_ERR(rec));
3493                 if (btrfs_root_refs(root_item) > 0)
3494                         rec->found_root_item = 1;
3495         }
3496
3497         btrfs_init_path(&path);
3498         memset(&root_node, 0, sizeof(root_node));
3499         cache_tree_init(&root_node.root_cache);
3500         cache_tree_init(&root_node.inode_cache);
3501
3502         /* Move the orphan extent record to corresponding inode_record */
3503         list_for_each_entry_safe(orphan, tmp,
3504                                  &root->orphan_data_extents, list) {
3505                 struct inode_record *inode;
3506
3507                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3508                                       1);
3509                 BUG_ON(IS_ERR(inode));
3510                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3511                 list_move(&orphan->list, &inode->orphan_extents);
3512         }
3513
3514         level = btrfs_header_level(root->node);
3515         memset(wc->nodes, 0, sizeof(wc->nodes));
3516         wc->nodes[level] = &root_node;
3517         wc->active_node = level;
3518         wc->root_level = level;
3519
3520         /* We may not have checked the root block, lets do that now */
3521         if (btrfs_is_leaf(root->node))
3522                 status = btrfs_check_leaf(root, NULL, root->node);
3523         else
3524                 status = btrfs_check_node(root, NULL, root->node);
3525         if (status != BTRFS_TREE_BLOCK_CLEAN)
3526                 return -EIO;
3527
3528         if (btrfs_root_refs(root_item) > 0 ||
3529             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3530                 path.nodes[level] = root->node;
3531                 extent_buffer_get(root->node);
3532                 path.slots[level] = 0;
3533         } else {
3534                 struct btrfs_key key;
3535                 struct btrfs_disk_key found_key;
3536
3537                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3538                 level = root_item->drop_level;
3539                 path.lowest_level = level;
3540                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3541                 if (wret < 0)
3542                         goto skip_walking;
3543                 btrfs_node_key(path.nodes[level], &found_key,
3544                                 path.slots[level]);
3545                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3546                                         sizeof(found_key)));
3547         }
3548
3549         while (1) {
3550                 wret = walk_down_tree(root, &path, wc, &level);
3551                 if (wret < 0)
3552                         ret = wret;
3553                 if (wret != 0)
3554                         break;
3555
3556                 wret = walk_up_tree(root, &path, wc, &level);
3557                 if (wret < 0)
3558                         ret = wret;
3559                 if (wret != 0)
3560                         break;
3561         }
3562 skip_walking:
3563         btrfs_release_path(&path);
3564
3565         if (!cache_tree_empty(&corrupt_blocks)) {
3566                 struct cache_extent *cache;
3567                 struct btrfs_corrupt_block *corrupt;
3568
3569                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3570                        root->root_key.objectid);
3571                 cache = first_cache_extent(&corrupt_blocks);
3572                 while (cache) {
3573                         corrupt = container_of(cache,
3574                                                struct btrfs_corrupt_block,
3575                                                cache);
3576                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3577                                cache->start, corrupt->level,
3578                                corrupt->key.objectid, corrupt->key.type,
3579                                corrupt->key.offset);
3580                         cache = next_cache_extent(cache);
3581                 }
3582                 if (repair) {
3583                         printf("Try to repair the btree for root %llu\n",
3584                                root->root_key.objectid);
3585                         ret = repair_btree(root, &corrupt_blocks);
3586                         if (ret < 0)
3587                                 fprintf(stderr, "Failed to repair btree: %s\n",
3588                                         strerror(-ret));
3589                         if (!ret)
3590                                 printf("Btree for root %llu is fixed\n",
3591                                        root->root_key.objectid);
3592                 }
3593         }
3594
3595         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3596         if (err < 0)
3597                 ret = err;
3598
3599         if (root_node.current) {
3600                 root_node.current->checked = 1;
3601                 maybe_free_inode_rec(&root_node.inode_cache,
3602                                 root_node.current);
3603         }
3604
3605         err = check_inode_recs(root, &root_node.inode_cache);
3606         if (!ret)
3607                 ret = err;
3608
3609         free_corrupt_blocks_tree(&corrupt_blocks);
3610         root->fs_info->corrupt_blocks = NULL;
3611         free_orphan_data_extents(&root->orphan_data_extents);
3612         return ret;
3613 }
3614
3615 static int fs_root_objectid(u64 objectid)
3616 {
3617         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3618             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3619                 return 1;
3620         return is_fstree(objectid);
3621 }
3622
3623 static int check_fs_roots(struct btrfs_root *root,
3624                           struct cache_tree *root_cache)
3625 {
3626         struct btrfs_path path;
3627         struct btrfs_key key;
3628         struct walk_control wc;
3629         struct extent_buffer *leaf, *tree_node;
3630         struct btrfs_root *tmp_root;
3631         struct btrfs_root *tree_root = root->fs_info->tree_root;
3632         int ret;
3633         int err = 0;
3634
3635         if (ctx.progress_enabled) {
3636                 ctx.tp = TASK_FS_ROOTS;
3637                 task_start(ctx.info);
3638         }
3639
3640         /*
3641          * Just in case we made any changes to the extent tree that weren't
3642          * reflected into the free space cache yet.
3643          */
3644         if (repair)
3645                 reset_cached_block_groups(root->fs_info);
3646         memset(&wc, 0, sizeof(wc));
3647         cache_tree_init(&wc.shared);
3648         btrfs_init_path(&path);
3649
3650 again:
3651         key.offset = 0;
3652         key.objectid = 0;
3653         key.type = BTRFS_ROOT_ITEM_KEY;
3654         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3655         if (ret < 0) {
3656                 err = 1;
3657                 goto out;
3658         }
3659         tree_node = tree_root->node;
3660         while (1) {
3661                 if (tree_node != tree_root->node) {
3662                         free_root_recs_tree(root_cache);
3663                         btrfs_release_path(&path);
3664                         goto again;
3665                 }
3666                 leaf = path.nodes[0];
3667                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3668                         ret = btrfs_next_leaf(tree_root, &path);
3669                         if (ret) {
3670                                 if (ret < 0)
3671                                         err = 1;
3672                                 break;
3673                         }
3674                         leaf = path.nodes[0];
3675                 }
3676                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3677                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3678                     fs_root_objectid(key.objectid)) {
3679                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3680                                 tmp_root = btrfs_read_fs_root_no_cache(
3681                                                 root->fs_info, &key);
3682                         } else {
3683                                 key.offset = (u64)-1;
3684                                 tmp_root = btrfs_read_fs_root(
3685                                                 root->fs_info, &key);
3686                         }
3687                         if (IS_ERR(tmp_root)) {
3688                                 err = 1;
3689                                 goto next;
3690                         }
3691                         ret = check_fs_root(tmp_root, root_cache, &wc);
3692                         if (ret == -EAGAIN) {
3693                                 free_root_recs_tree(root_cache);
3694                                 btrfs_release_path(&path);
3695                                 goto again;
3696                         }
3697                         if (ret)
3698                                 err = 1;
3699                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3700                                 btrfs_free_fs_root(tmp_root);
3701                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3702                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3703                         process_root_ref(leaf, path.slots[0], &key,
3704                                          root_cache);
3705                 }
3706 next:
3707                 path.slots[0]++;
3708         }
3709 out:
3710         btrfs_release_path(&path);
3711         if (err)
3712                 free_extent_cache_tree(&wc.shared);
3713         if (!cache_tree_empty(&wc.shared))
3714                 fprintf(stderr, "warning line %d\n", __LINE__);
3715
3716         task_stop(ctx.info);
3717
3718         return err;
3719 }
3720
3721 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3722 {
3723         struct list_head *cur = rec->backrefs.next;
3724         struct extent_backref *back;
3725         struct tree_backref *tback;
3726         struct data_backref *dback;
3727         u64 found = 0;
3728         int err = 0;
3729
3730         while(cur != &rec->backrefs) {
3731                 back = list_entry(cur, struct extent_backref, list);
3732                 cur = cur->next;
3733                 if (!back->found_extent_tree) {
3734                         err = 1;
3735                         if (!print_errs)
3736                                 goto out;
3737                         if (back->is_data) {
3738                                 dback = (struct data_backref *)back;
3739                                 fprintf(stderr, "Backref %llu %s %llu"
3740                                         " owner %llu offset %llu num_refs %lu"
3741                                         " not found in extent tree\n",
3742                                         (unsigned long long)rec->start,
3743                                         back->full_backref ?
3744                                         "parent" : "root",
3745                                         back->full_backref ?
3746                                         (unsigned long long)dback->parent:
3747                                         (unsigned long long)dback->root,
3748                                         (unsigned long long)dback->owner,
3749                                         (unsigned long long)dback->offset,
3750                                         (unsigned long)dback->num_refs);
3751                         } else {
3752                                 tback = (struct tree_backref *)back;
3753                                 fprintf(stderr, "Backref %llu parent %llu"
3754                                         " root %llu not found in extent tree\n",
3755                                         (unsigned long long)rec->start,
3756                                         (unsigned long long)tback->parent,
3757                                         (unsigned long long)tback->root);
3758                         }
3759                 }
3760                 if (!back->is_data && !back->found_ref) {
3761                         err = 1;
3762                         if (!print_errs)
3763                                 goto out;
3764                         tback = (struct tree_backref *)back;
3765                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3766                                 (unsigned long long)rec->start,
3767                                 back->full_backref ? "parent" : "root",
3768                                 back->full_backref ?
3769                                 (unsigned long long)tback->parent :
3770                                 (unsigned long long)tback->root, back);
3771                 }
3772                 if (back->is_data) {
3773                         dback = (struct data_backref *)back;
3774                         if (dback->found_ref != dback->num_refs) {
3775                                 err = 1;
3776                                 if (!print_errs)
3777                                         goto out;
3778                                 fprintf(stderr, "Incorrect local backref count"
3779                                         " on %llu %s %llu owner %llu"
3780                                         " offset %llu found %u wanted %u back %p\n",
3781                                         (unsigned long long)rec->start,
3782                                         back->full_backref ?
3783                                         "parent" : "root",
3784                                         back->full_backref ?
3785                                         (unsigned long long)dback->parent:
3786                                         (unsigned long long)dback->root,
3787                                         (unsigned long long)dback->owner,
3788                                         (unsigned long long)dback->offset,
3789                                         dback->found_ref, dback->num_refs, back);
3790                         }
3791                         if (dback->disk_bytenr != rec->start) {
3792                                 err = 1;
3793                                 if (!print_errs)
3794                                         goto out;
3795                                 fprintf(stderr, "Backref disk bytenr does not"
3796                                         " match extent record, bytenr=%llu, "
3797                                         "ref bytenr=%llu\n",
3798                                         (unsigned long long)rec->start,
3799                                         (unsigned long long)dback->disk_bytenr);
3800                         }
3801
3802                         if (dback->bytes != rec->nr) {
3803                                 err = 1;
3804                                 if (!print_errs)
3805                                         goto out;
3806                                 fprintf(stderr, "Backref bytes do not match "
3807                                         "extent backref, bytenr=%llu, ref "
3808                                         "bytes=%llu, backref bytes=%llu\n",
3809                                         (unsigned long long)rec->start,
3810                                         (unsigned long long)rec->nr,
3811                                         (unsigned long long)dback->bytes);
3812                         }
3813                 }
3814                 if (!back->is_data) {
3815                         found += 1;
3816                 } else {
3817                         dback = (struct data_backref *)back;
3818                         found += dback->found_ref;
3819                 }
3820         }
3821         if (found != rec->refs) {
3822                 err = 1;
3823                 if (!print_errs)
3824                         goto out;
3825                 fprintf(stderr, "Incorrect global backref count "
3826                         "on %llu found %llu wanted %llu\n",
3827                         (unsigned long long)rec->start,
3828                         (unsigned long long)found,
3829                         (unsigned long long)rec->refs);
3830         }
3831 out:
3832         return err;
3833 }
3834
3835 static int free_all_extent_backrefs(struct extent_record *rec)
3836 {
3837         struct extent_backref *back;
3838         struct list_head *cur;
3839         while (!list_empty(&rec->backrefs)) {
3840                 cur = rec->backrefs.next;
3841                 back = list_entry(cur, struct extent_backref, list);
3842                 list_del(cur);
3843                 free(back);
3844         }
3845         return 0;
3846 }
3847
3848 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3849                                      struct cache_tree *extent_cache)
3850 {
3851         struct cache_extent *cache;
3852         struct extent_record *rec;
3853
3854         while (1) {
3855                 cache = first_cache_extent(extent_cache);
3856                 if (!cache)
3857                         break;
3858                 rec = container_of(cache, struct extent_record, cache);
3859                 remove_cache_extent(extent_cache, cache);
3860                 free_all_extent_backrefs(rec);
3861                 free(rec);
3862         }
3863 }
3864
3865 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3866                                  struct extent_record *rec)
3867 {
3868         if (rec->content_checked && rec->owner_ref_checked &&
3869             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3870             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3871             !rec->bad_full_backref && !rec->crossing_stripes &&
3872             !rec->wrong_chunk_type) {
3873                 remove_cache_extent(extent_cache, &rec->cache);
3874                 free_all_extent_backrefs(rec);
3875                 list_del_init(&rec->list);
3876                 free(rec);
3877         }
3878         return 0;
3879 }
3880
3881 static int check_owner_ref(struct btrfs_root *root,
3882                             struct extent_record *rec,
3883                             struct extent_buffer *buf)
3884 {
3885         struct extent_backref *node;
3886         struct tree_backref *back;
3887         struct btrfs_root *ref_root;
3888         struct btrfs_key key;
3889         struct btrfs_path path;
3890         struct extent_buffer *parent;
3891         int level;
3892         int found = 0;
3893         int ret;
3894
3895         list_for_each_entry(node, &rec->backrefs, list) {
3896                 if (node->is_data)
3897                         continue;
3898                 if (!node->found_ref)
3899                         continue;
3900                 if (node->full_backref)
3901                         continue;
3902                 back = (struct tree_backref *)node;
3903                 if (btrfs_header_owner(buf) == back->root)
3904                         return 0;
3905         }
3906         BUG_ON(rec->is_root);
3907
3908         /* try to find the block by search corresponding fs tree */
3909         key.objectid = btrfs_header_owner(buf);
3910         key.type = BTRFS_ROOT_ITEM_KEY;
3911         key.offset = (u64)-1;
3912
3913         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3914         if (IS_ERR(ref_root))
3915                 return 1;
3916
3917         level = btrfs_header_level(buf);
3918         if (level == 0)
3919                 btrfs_item_key_to_cpu(buf, &key, 0);
3920         else
3921                 btrfs_node_key_to_cpu(buf, &key, 0);
3922
3923         btrfs_init_path(&path);
3924         path.lowest_level = level + 1;
3925         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3926         if (ret < 0)
3927                 return 0;
3928
3929         parent = path.nodes[level + 1];
3930         if (parent && buf->start == btrfs_node_blockptr(parent,
3931                                                         path.slots[level + 1]))
3932                 found = 1;
3933
3934         btrfs_release_path(&path);
3935         return found ? 0 : 1;
3936 }
3937
3938 static int is_extent_tree_record(struct extent_record *rec)
3939 {
3940         struct list_head *cur = rec->backrefs.next;
3941         struct extent_backref *node;
3942         struct tree_backref *back;
3943         int is_extent = 0;
3944
3945         while(cur != &rec->backrefs) {
3946                 node = list_entry(cur, struct extent_backref, list);
3947                 cur = cur->next;
3948                 if (node->is_data)
3949                         return 0;
3950                 back = (struct tree_backref *)node;
3951                 if (node->full_backref)
3952                         return 0;
3953                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3954                         is_extent = 1;
3955         }
3956         return is_extent;
3957 }
3958
3959
3960 static int record_bad_block_io(struct btrfs_fs_info *info,
3961                                struct cache_tree *extent_cache,
3962                                u64 start, u64 len)
3963 {
3964         struct extent_record *rec;
3965         struct cache_extent *cache;
3966         struct btrfs_key key;
3967
3968         cache = lookup_cache_extent(extent_cache, start, len);
3969         if (!cache)
3970                 return 0;
3971
3972         rec = container_of(cache, struct extent_record, cache);
3973         if (!is_extent_tree_record(rec))
3974                 return 0;
3975
3976         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3977         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3978 }
3979
3980 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3981                        struct extent_buffer *buf, int slot)
3982 {
3983         if (btrfs_header_level(buf)) {
3984                 struct btrfs_key_ptr ptr1, ptr2;
3985
3986                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3987                                    sizeof(struct btrfs_key_ptr));
3988                 read_extent_buffer(buf, &ptr2,
3989                                    btrfs_node_key_ptr_offset(slot + 1),
3990                                    sizeof(struct btrfs_key_ptr));
3991                 write_extent_buffer(buf, &ptr1,
3992                                     btrfs_node_key_ptr_offset(slot + 1),
3993                                     sizeof(struct btrfs_key_ptr));
3994                 write_extent_buffer(buf, &ptr2,
3995                                     btrfs_node_key_ptr_offset(slot),
3996                                     sizeof(struct btrfs_key_ptr));
3997                 if (slot == 0) {
3998                         struct btrfs_disk_key key;
3999                         btrfs_node_key(buf, &key, 0);
4000                         btrfs_fixup_low_keys(root, path, &key,
4001                                              btrfs_header_level(buf) + 1);
4002                 }
4003         } else {
4004                 struct btrfs_item *item1, *item2;
4005                 struct btrfs_key k1, k2;
4006                 char *item1_data, *item2_data;
4007                 u32 item1_offset, item2_offset, item1_size, item2_size;
4008
4009                 item1 = btrfs_item_nr(slot);
4010                 item2 = btrfs_item_nr(slot + 1);
4011                 btrfs_item_key_to_cpu(buf, &k1, slot);
4012                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4013                 item1_offset = btrfs_item_offset(buf, item1);
4014                 item2_offset = btrfs_item_offset(buf, item2);
4015                 item1_size = btrfs_item_size(buf, item1);
4016                 item2_size = btrfs_item_size(buf, item2);
4017
4018                 item1_data = malloc(item1_size);
4019                 if (!item1_data)
4020                         return -ENOMEM;
4021                 item2_data = malloc(item2_size);
4022                 if (!item2_data) {
4023                         free(item1_data);
4024                         return -ENOMEM;
4025                 }
4026
4027                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4028                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4029
4030                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4031                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4032                 free(item1_data);
4033                 free(item2_data);
4034
4035                 btrfs_set_item_offset(buf, item1, item2_offset);
4036                 btrfs_set_item_offset(buf, item2, item1_offset);
4037                 btrfs_set_item_size(buf, item1, item2_size);
4038                 btrfs_set_item_size(buf, item2, item1_size);
4039
4040                 path->slots[0] = slot;
4041                 btrfs_set_item_key_unsafe(root, path, &k2);
4042                 path->slots[0] = slot + 1;
4043                 btrfs_set_item_key_unsafe(root, path, &k1);
4044         }
4045         return 0;
4046 }
4047
4048 static int fix_key_order(struct btrfs_trans_handle *trans,
4049                          struct btrfs_root *root,
4050                          struct btrfs_path *path)
4051 {
4052         struct extent_buffer *buf;
4053         struct btrfs_key k1, k2;
4054         int i;
4055         int level = path->lowest_level;
4056         int ret = -EIO;
4057
4058         buf = path->nodes[level];
4059         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4060                 if (level) {
4061                         btrfs_node_key_to_cpu(buf, &k1, i);
4062                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4063                 } else {
4064                         btrfs_item_key_to_cpu(buf, &k1, i);
4065                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4066                 }
4067                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4068                         continue;
4069                 ret = swap_values(root, path, buf, i);
4070                 if (ret)
4071                         break;
4072                 btrfs_mark_buffer_dirty(buf);
4073                 i = 0;
4074         }
4075         return ret;
4076 }
4077
4078 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4079                              struct btrfs_root *root,
4080                              struct btrfs_path *path,
4081                              struct extent_buffer *buf, int slot)
4082 {
4083         struct btrfs_key key;
4084         int nritems = btrfs_header_nritems(buf);
4085
4086         btrfs_item_key_to_cpu(buf, &key, slot);
4087
4088         /* These are all the keys we can deal with missing. */
4089         if (key.type != BTRFS_DIR_INDEX_KEY &&
4090             key.type != BTRFS_EXTENT_ITEM_KEY &&
4091             key.type != BTRFS_METADATA_ITEM_KEY &&
4092             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4093             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4094                 return -1;
4095
4096         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4097                (unsigned long long)key.objectid, key.type,
4098                (unsigned long long)key.offset, slot, buf->start);
4099         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4100                               btrfs_item_nr_offset(slot + 1),
4101                               sizeof(struct btrfs_item) *
4102                               (nritems - slot - 1));
4103         btrfs_set_header_nritems(buf, nritems - 1);
4104         if (slot == 0) {
4105                 struct btrfs_disk_key disk_key;
4106
4107                 btrfs_item_key(buf, &disk_key, 0);
4108                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4109         }
4110         btrfs_mark_buffer_dirty(buf);
4111         return 0;
4112 }
4113
4114 static int fix_item_offset(struct btrfs_trans_handle *trans,
4115                            struct btrfs_root *root,
4116                            struct btrfs_path *path)
4117 {
4118         struct extent_buffer *buf;
4119         int i;
4120         int ret = 0;
4121
4122         /* We should only get this for leaves */
4123         BUG_ON(path->lowest_level);
4124         buf = path->nodes[0];
4125 again:
4126         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4127                 unsigned int shift = 0, offset;
4128
4129                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4130                     BTRFS_LEAF_DATA_SIZE(root)) {
4131                         if (btrfs_item_end_nr(buf, i) >
4132                             BTRFS_LEAF_DATA_SIZE(root)) {
4133                                 ret = delete_bogus_item(trans, root, path,
4134                                                         buf, i);
4135                                 if (!ret)
4136                                         goto again;
4137                                 fprintf(stderr, "item is off the end of the "
4138                                         "leaf, can't fix\n");
4139                                 ret = -EIO;
4140                                 break;
4141                         }
4142                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4143                                 btrfs_item_end_nr(buf, i);
4144                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4145                            btrfs_item_offset_nr(buf, i - 1)) {
4146                         if (btrfs_item_end_nr(buf, i) >
4147                             btrfs_item_offset_nr(buf, i - 1)) {
4148                                 ret = delete_bogus_item(trans, root, path,
4149                                                         buf, i);
4150                                 if (!ret)
4151                                         goto again;
4152                                 fprintf(stderr, "items overlap, can't fix\n");
4153                                 ret = -EIO;
4154                                 break;
4155                         }
4156                         shift = btrfs_item_offset_nr(buf, i - 1) -
4157                                 btrfs_item_end_nr(buf, i);
4158                 }
4159                 if (!shift)
4160                         continue;
4161
4162                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4163                        i, shift, (unsigned long long)buf->start);
4164                 offset = btrfs_item_offset_nr(buf, i);
4165                 memmove_extent_buffer(buf,
4166                                       btrfs_leaf_data(buf) + offset + shift,
4167                                       btrfs_leaf_data(buf) + offset,
4168                                       btrfs_item_size_nr(buf, i));
4169                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4170                                       offset + shift);
4171                 btrfs_mark_buffer_dirty(buf);
4172         }
4173
4174         /*
4175          * We may have moved things, in which case we want to exit so we don't
4176          * write those changes out.  Once we have proper abort functionality in
4177          * progs this can be changed to something nicer.
4178          */
4179         BUG_ON(ret);
4180         return ret;
4181 }
4182
4183 /*
4184  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4185  * then just return -EIO.
4186  */
4187 static int try_to_fix_bad_block(struct btrfs_root *root,
4188                                 struct extent_buffer *buf,
4189                                 enum btrfs_tree_block_status status)
4190 {
4191         struct btrfs_trans_handle *trans;
4192         struct ulist *roots;
4193         struct ulist_node *node;
4194         struct btrfs_root *search_root;
4195         struct btrfs_path *path;
4196         struct ulist_iterator iter;
4197         struct btrfs_key root_key, key;
4198         int ret;
4199
4200         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4201             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4202                 return -EIO;
4203
4204         path = btrfs_alloc_path();
4205         if (!path)
4206                 return -EIO;
4207
4208         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4209                                    0, &roots);
4210         if (ret) {
4211                 btrfs_free_path(path);
4212                 return -EIO;
4213         }
4214
4215         ULIST_ITER_INIT(&iter);
4216         while ((node = ulist_next(roots, &iter))) {
4217                 root_key.objectid = node->val;
4218                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4219                 root_key.offset = (u64)-1;
4220
4221                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4222                 if (IS_ERR(root)) {
4223                         ret = -EIO;
4224                         break;
4225                 }
4226
4227
4228                 trans = btrfs_start_transaction(search_root, 0);
4229                 if (IS_ERR(trans)) {
4230                         ret = PTR_ERR(trans);
4231                         break;
4232                 }
4233
4234                 path->lowest_level = btrfs_header_level(buf);
4235                 path->skip_check_block = 1;
4236                 if (path->lowest_level)
4237                         btrfs_node_key_to_cpu(buf, &key, 0);
4238                 else
4239                         btrfs_item_key_to_cpu(buf, &key, 0);
4240                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4241                 if (ret) {
4242                         ret = -EIO;
4243                         btrfs_commit_transaction(trans, search_root);
4244                         break;
4245                 }
4246                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4247                         ret = fix_key_order(trans, search_root, path);
4248                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4249                         ret = fix_item_offset(trans, search_root, path);
4250                 if (ret) {
4251                         btrfs_commit_transaction(trans, search_root);
4252                         break;
4253                 }
4254                 btrfs_release_path(path);
4255                 btrfs_commit_transaction(trans, search_root);
4256         }
4257         ulist_free(roots);
4258         btrfs_free_path(path);
4259         return ret;
4260 }
4261
4262 static int check_block(struct btrfs_root *root,
4263                        struct cache_tree *extent_cache,
4264                        struct extent_buffer *buf, u64 flags)
4265 {
4266         struct extent_record *rec;
4267         struct cache_extent *cache;
4268         struct btrfs_key key;
4269         enum btrfs_tree_block_status status;
4270         int ret = 0;
4271         int level;
4272
4273         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4274         if (!cache)
4275                 return 1;
4276         rec = container_of(cache, struct extent_record, cache);
4277         rec->generation = btrfs_header_generation(buf);
4278
4279         level = btrfs_header_level(buf);
4280         if (btrfs_header_nritems(buf) > 0) {
4281
4282                 if (level == 0)
4283                         btrfs_item_key_to_cpu(buf, &key, 0);
4284                 else
4285                         btrfs_node_key_to_cpu(buf, &key, 0);
4286
4287                 rec->info_objectid = key.objectid;
4288         }
4289         rec->info_level = level;
4290
4291         if (btrfs_is_leaf(buf))
4292                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4293         else
4294                 status = btrfs_check_node(root, &rec->parent_key, buf);
4295
4296         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4297                 if (repair)
4298                         status = try_to_fix_bad_block(root, buf, status);
4299                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4300                         ret = -EIO;
4301                         fprintf(stderr, "bad block %llu\n",
4302                                 (unsigned long long)buf->start);
4303                 } else {
4304                         /*
4305                          * Signal to callers we need to start the scan over
4306                          * again since we'll have cow'ed blocks.
4307                          */
4308                         ret = -EAGAIN;
4309                 }
4310         } else {
4311                 rec->content_checked = 1;
4312                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4313                         rec->owner_ref_checked = 1;
4314                 else {
4315                         ret = check_owner_ref(root, rec, buf);
4316                         if (!ret)
4317                                 rec->owner_ref_checked = 1;
4318                 }
4319         }
4320         if (!ret)
4321                 maybe_free_extent_rec(extent_cache, rec);
4322         return ret;
4323 }
4324
4325 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4326                                                 u64 parent, u64 root)
4327 {
4328         struct list_head *cur = rec->backrefs.next;
4329         struct extent_backref *node;
4330         struct tree_backref *back;
4331
4332         while(cur != &rec->backrefs) {
4333                 node = list_entry(cur, struct extent_backref, list);
4334                 cur = cur->next;
4335                 if (node->is_data)
4336                         continue;
4337                 back = (struct tree_backref *)node;
4338                 if (parent > 0) {
4339                         if (!node->full_backref)
4340                                 continue;
4341                         if (parent == back->parent)
4342                                 return back;
4343                 } else {
4344                         if (node->full_backref)
4345                                 continue;
4346                         if (back->root == root)
4347                                 return back;
4348                 }
4349         }
4350         return NULL;
4351 }
4352
4353 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4354                                                 u64 parent, u64 root)
4355 {
4356         struct tree_backref *ref = malloc(sizeof(*ref));
4357
4358         if (!ref)
4359                 return NULL;
4360         memset(&ref->node, 0, sizeof(ref->node));
4361         if (parent > 0) {
4362                 ref->parent = parent;
4363                 ref->node.full_backref = 1;
4364         } else {
4365                 ref->root = root;
4366                 ref->node.full_backref = 0;
4367         }
4368         list_add_tail(&ref->node.list, &rec->backrefs);
4369
4370         return ref;
4371 }
4372
4373 static struct data_backref *find_data_backref(struct extent_record *rec,
4374                                                 u64 parent, u64 root,
4375                                                 u64 owner, u64 offset,
4376                                                 int found_ref,
4377                                                 u64 disk_bytenr, u64 bytes)
4378 {
4379         struct list_head *cur = rec->backrefs.next;
4380         struct extent_backref *node;
4381         struct data_backref *back;
4382
4383         while(cur != &rec->backrefs) {
4384                 node = list_entry(cur, struct extent_backref, list);
4385                 cur = cur->next;
4386                 if (!node->is_data)
4387                         continue;
4388                 back = (struct data_backref *)node;
4389                 if (parent > 0) {
4390                         if (!node->full_backref)
4391                                 continue;
4392                         if (parent == back->parent)
4393                                 return back;
4394                 } else {
4395                         if (node->full_backref)
4396                                 continue;
4397                         if (back->root == root && back->owner == owner &&
4398                             back->offset == offset) {
4399                                 if (found_ref && node->found_ref &&
4400                                     (back->bytes != bytes ||
4401                                     back->disk_bytenr != disk_bytenr))
4402                                         continue;
4403                                 return back;
4404                         }
4405                 }
4406         }
4407         return NULL;
4408 }
4409
4410 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4411                                                 u64 parent, u64 root,
4412                                                 u64 owner, u64 offset,
4413                                                 u64 max_size)
4414 {
4415         struct data_backref *ref = malloc(sizeof(*ref));
4416         memset(&ref->node, 0, sizeof(ref->node));
4417         ref->node.is_data = 1;
4418
4419         if (parent > 0) {
4420                 ref->parent = parent;
4421                 ref->owner = 0;
4422                 ref->offset = 0;
4423                 ref->node.full_backref = 1;
4424         } else {
4425                 ref->root = root;
4426                 ref->owner = owner;
4427                 ref->offset = offset;
4428                 ref->node.full_backref = 0;
4429         }
4430         ref->bytes = max_size;
4431         ref->found_ref = 0;
4432         ref->num_refs = 0;
4433         list_add_tail(&ref->node.list, &rec->backrefs);
4434         if (max_size > rec->max_size)
4435                 rec->max_size = max_size;
4436         return ref;
4437 }
4438
4439 /* Check if the type of extent matches with its chunk */
4440 static void check_extent_type(struct extent_record *rec)
4441 {
4442         struct btrfs_block_group_cache *bg_cache;
4443
4444         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4445         if (!bg_cache)
4446                 return;
4447
4448         /* data extent, check chunk directly*/
4449         if (!rec->metadata) {
4450                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4451                         rec->wrong_chunk_type = 1;
4452                 return;
4453         }
4454
4455         /* metadata extent, check the obvious case first */
4456         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4457                                  BTRFS_BLOCK_GROUP_METADATA))) {
4458                 rec->wrong_chunk_type = 1;
4459                 return;
4460         }
4461
4462         /*
4463          * Check SYSTEM extent, as it's also marked as metadata, we can only
4464          * make sure it's a SYSTEM extent by its backref
4465          */
4466         if (!list_empty(&rec->backrefs)) {
4467                 struct extent_backref *node;
4468                 struct tree_backref *tback;
4469                 u64 bg_type;
4470
4471                 node = list_entry(rec->backrefs.next, struct extent_backref,
4472                                   list);
4473                 if (node->is_data) {
4474                         /* tree block shouldn't have data backref */
4475                         rec->wrong_chunk_type = 1;
4476                         return;
4477                 }
4478                 tback = container_of(node, struct tree_backref, node);
4479
4480                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4481                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4482                 else
4483                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4484                 if (!(bg_cache->flags & bg_type))
4485                         rec->wrong_chunk_type = 1;
4486         }
4487 }
4488
4489 static int add_extent_rec(struct cache_tree *extent_cache,
4490                           struct btrfs_key *parent_key, u64 parent_gen,
4491                           u64 start, u64 nr, u64 extent_item_refs,
4492                           int is_root, int inc_ref, int set_checked,
4493                           int metadata, int extent_rec, u64 max_size)
4494 {
4495         struct extent_record *rec;
4496         struct cache_extent *cache;
4497         int ret = 0;
4498         int dup = 0;
4499
4500         cache = lookup_cache_extent(extent_cache, start, nr);
4501         if (cache) {
4502                 rec = container_of(cache, struct extent_record, cache);
4503                 if (inc_ref)
4504                         rec->refs++;
4505                 if (rec->nr == 1)
4506                         rec->nr = max(nr, max_size);
4507
4508                 /*
4509                  * We need to make sure to reset nr to whatever the extent
4510                  * record says was the real size, this way we can compare it to
4511                  * the backrefs.
4512                  */
4513                 if (extent_rec) {
4514                         if (start != rec->start || rec->found_rec) {
4515                                 struct extent_record *tmp;
4516
4517                                 dup = 1;
4518                                 if (list_empty(&rec->list))
4519                                         list_add_tail(&rec->list,
4520                                                       &duplicate_extents);
4521
4522                                 /*
4523                                  * We have to do this song and dance in case we
4524                                  * find an extent record that falls inside of
4525                                  * our current extent record but does not have
4526                                  * the same objectid.
4527                                  */
4528                                 tmp = malloc(sizeof(*tmp));
4529                                 if (!tmp)
4530                                         return -ENOMEM;
4531                                 tmp->start = start;
4532                                 tmp->max_size = max_size;
4533                                 tmp->nr = nr;
4534                                 tmp->found_rec = 1;
4535                                 tmp->metadata = metadata;
4536                                 tmp->extent_item_refs = extent_item_refs;
4537                                 INIT_LIST_HEAD(&tmp->list);
4538                                 list_add_tail(&tmp->list, &rec->dups);
4539                                 rec->num_duplicates++;
4540                         } else {
4541                                 rec->nr = nr;
4542                                 rec->found_rec = 1;
4543                         }
4544                 }
4545
4546                 if (extent_item_refs && !dup) {
4547                         if (rec->extent_item_refs) {
4548                                 fprintf(stderr, "block %llu rec "
4549                                         "extent_item_refs %llu, passed %llu\n",
4550                                         (unsigned long long)start,
4551                                         (unsigned long long)
4552                                                         rec->extent_item_refs,
4553                                         (unsigned long long)extent_item_refs);
4554                         }
4555                         rec->extent_item_refs = extent_item_refs;
4556                 }
4557                 if (is_root)
4558                         rec->is_root = 1;
4559                 if (set_checked) {
4560                         rec->content_checked = 1;
4561                         rec->owner_ref_checked = 1;
4562                 }
4563
4564                 if (parent_key)
4565                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4566                 if (parent_gen)
4567                         rec->parent_generation = parent_gen;
4568
4569                 if (rec->max_size < max_size)
4570                         rec->max_size = max_size;
4571
4572                 /*
4573                  * A metadata extent can't cross stripe_len boundary, otherwise
4574                  * kernel scrub won't be able to handle it.
4575                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4576                  * it.
4577                  */
4578                 if (metadata && check_crossing_stripes(rec->start,
4579                                                        rec->max_size))
4580                                 rec->crossing_stripes = 1;
4581                 check_extent_type(rec);
4582                 maybe_free_extent_rec(extent_cache, rec);
4583                 return ret;
4584         }
4585         rec = malloc(sizeof(*rec));
4586         rec->start = start;
4587         rec->max_size = max_size;
4588         rec->nr = max(nr, max_size);
4589         rec->found_rec = !!extent_rec;
4590         rec->content_checked = 0;
4591         rec->owner_ref_checked = 0;
4592         rec->num_duplicates = 0;
4593         rec->metadata = metadata;
4594         rec->flag_block_full_backref = -1;
4595         rec->bad_full_backref = 0;
4596         rec->crossing_stripes = 0;
4597         rec->wrong_chunk_type = 0;
4598         INIT_LIST_HEAD(&rec->backrefs);
4599         INIT_LIST_HEAD(&rec->dups);
4600         INIT_LIST_HEAD(&rec->list);
4601
4602         if (is_root)
4603                 rec->is_root = 1;
4604         else
4605                 rec->is_root = 0;
4606
4607         if (inc_ref)
4608                 rec->refs = 1;
4609         else
4610                 rec->refs = 0;
4611
4612         if (extent_item_refs)
4613                 rec->extent_item_refs = extent_item_refs;
4614         else
4615                 rec->extent_item_refs = 0;
4616
4617         if (parent_key)
4618                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4619         else
4620                 memset(&rec->parent_key, 0, sizeof(*parent_key));
4621
4622         if (parent_gen)
4623                 rec->parent_generation = parent_gen;
4624         else
4625                 rec->parent_generation = 0;
4626
4627         rec->cache.start = start;
4628         rec->cache.size = nr;
4629         ret = insert_cache_extent(extent_cache, &rec->cache);
4630         BUG_ON(ret);
4631         bytes_used += nr;
4632         if (set_checked) {
4633                 rec->content_checked = 1;
4634                 rec->owner_ref_checked = 1;
4635         }
4636
4637         if (metadata)
4638                 if (check_crossing_stripes(rec->start, rec->max_size))
4639                         rec->crossing_stripes = 1;
4640         check_extent_type(rec);
4641         return ret;
4642 }
4643
4644 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4645                             u64 parent, u64 root, int found_ref)
4646 {
4647         struct extent_record *rec;
4648         struct tree_backref *back;
4649         struct cache_extent *cache;
4650
4651         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4652         if (!cache) {
4653                 add_extent_rec(extent_cache, NULL, 0, bytenr,
4654                                1, 0, 0, 0, 0, 1, 0, 0);
4655                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4656                 if (!cache)
4657                         abort();
4658         }
4659
4660         rec = container_of(cache, struct extent_record, cache);
4661         if (rec->start != bytenr) {
4662                 abort();
4663         }
4664
4665         back = find_tree_backref(rec, parent, root);
4666         if (!back) {
4667                 back = alloc_tree_backref(rec, parent, root);
4668                 BUG_ON(!back);
4669         }
4670
4671         if (found_ref) {
4672                 if (back->node.found_ref) {
4673                         fprintf(stderr, "Extent back ref already exists "
4674                                 "for %llu parent %llu root %llu \n",
4675                                 (unsigned long long)bytenr,
4676                                 (unsigned long long)parent,
4677                                 (unsigned long long)root);
4678                 }
4679                 back->node.found_ref = 1;
4680         } else {
4681                 if (back->node.found_extent_tree) {
4682                         fprintf(stderr, "Extent back ref already exists "
4683                                 "for %llu parent %llu root %llu \n",
4684                                 (unsigned long long)bytenr,
4685                                 (unsigned long long)parent,
4686                                 (unsigned long long)root);
4687                 }
4688                 back->node.found_extent_tree = 1;
4689         }
4690         check_extent_type(rec);
4691         maybe_free_extent_rec(extent_cache, rec);
4692         return 0;
4693 }
4694
4695 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4696                             u64 parent, u64 root, u64 owner, u64 offset,
4697                             u32 num_refs, int found_ref, u64 max_size)
4698 {
4699         struct extent_record *rec;
4700         struct data_backref *back;
4701         struct cache_extent *cache;
4702
4703         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4704         if (!cache) {
4705                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4706                                0, 0, max_size);
4707                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4708                 if (!cache)
4709                         abort();
4710         }
4711
4712         rec = container_of(cache, struct extent_record, cache);
4713         if (rec->max_size < max_size)
4714                 rec->max_size = max_size;
4715
4716         /*
4717          * If found_ref is set then max_size is the real size and must match the
4718          * existing refs.  So if we have already found a ref then we need to
4719          * make sure that this ref matches the existing one, otherwise we need
4720          * to add a new backref so we can notice that the backrefs don't match
4721          * and we need to figure out who is telling the truth.  This is to
4722          * account for that awful fsync bug I introduced where we'd end up with
4723          * a btrfs_file_extent_item that would have its length include multiple
4724          * prealloc extents or point inside of a prealloc extent.
4725          */
4726         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4727                                  bytenr, max_size);
4728         if (!back)
4729                 back = alloc_data_backref(rec, parent, root, owner, offset,
4730                                           max_size);
4731
4732         if (found_ref) {
4733                 BUG_ON(num_refs != 1);
4734                 if (back->node.found_ref)
4735                         BUG_ON(back->bytes != max_size);
4736                 back->node.found_ref = 1;
4737                 back->found_ref += 1;
4738                 back->bytes = max_size;
4739                 back->disk_bytenr = bytenr;
4740                 rec->refs += 1;
4741                 rec->content_checked = 1;
4742                 rec->owner_ref_checked = 1;
4743         } else {
4744                 if (back->node.found_extent_tree) {
4745                         fprintf(stderr, "Extent back ref already exists "
4746                                 "for %llu parent %llu root %llu "
4747                                 "owner %llu offset %llu num_refs %lu\n",
4748                                 (unsigned long long)bytenr,
4749                                 (unsigned long long)parent,
4750                                 (unsigned long long)root,
4751                                 (unsigned long long)owner,
4752                                 (unsigned long long)offset,
4753                                 (unsigned long)num_refs);
4754                 }
4755                 back->num_refs = num_refs;
4756                 back->node.found_extent_tree = 1;
4757         }
4758         maybe_free_extent_rec(extent_cache, rec);
4759         return 0;
4760 }
4761
4762 static int add_pending(struct cache_tree *pending,
4763                        struct cache_tree *seen, u64 bytenr, u32 size)
4764 {
4765         int ret;
4766         ret = add_cache_extent(seen, bytenr, size);
4767         if (ret)
4768                 return ret;
4769         add_cache_extent(pending, bytenr, size);
4770         return 0;
4771 }
4772
4773 static int pick_next_pending(struct cache_tree *pending,
4774                         struct cache_tree *reada,
4775                         struct cache_tree *nodes,
4776                         u64 last, struct block_info *bits, int bits_nr,
4777                         int *reada_bits)
4778 {
4779         unsigned long node_start = last;
4780         struct cache_extent *cache;
4781         int ret;
4782
4783         cache = search_cache_extent(reada, 0);
4784         if (cache) {
4785                 bits[0].start = cache->start;
4786                 bits[0].size = cache->size;
4787                 *reada_bits = 1;
4788                 return 1;
4789         }
4790         *reada_bits = 0;
4791         if (node_start > 32768)
4792                 node_start -= 32768;
4793
4794         cache = search_cache_extent(nodes, node_start);
4795         if (!cache)
4796                 cache = search_cache_extent(nodes, 0);
4797
4798         if (!cache) {
4799                  cache = search_cache_extent(pending, 0);
4800                  if (!cache)
4801                          return 0;
4802                  ret = 0;
4803                  do {
4804                          bits[ret].start = cache->start;
4805                          bits[ret].size = cache->size;
4806                          cache = next_cache_extent(cache);
4807                          ret++;
4808                  } while (cache && ret < bits_nr);
4809                  return ret;
4810         }
4811
4812         ret = 0;
4813         do {
4814                 bits[ret].start = cache->start;
4815                 bits[ret].size = cache->size;
4816                 cache = next_cache_extent(cache);
4817                 ret++;
4818         } while (cache && ret < bits_nr);
4819
4820         if (bits_nr - ret > 8) {
4821                 u64 lookup = bits[0].start + bits[0].size;
4822                 struct cache_extent *next;
4823                 next = search_cache_extent(pending, lookup);
4824                 while(next) {
4825                         if (next->start - lookup > 32768)
4826                                 break;
4827                         bits[ret].start = next->start;
4828                         bits[ret].size = next->size;
4829                         lookup = next->start + next->size;
4830                         ret++;
4831                         if (ret == bits_nr)
4832                                 break;
4833                         next = next_cache_extent(next);
4834                         if (!next)
4835                                 break;
4836                 }
4837         }
4838         return ret;
4839 }
4840
4841 static void free_chunk_record(struct cache_extent *cache)
4842 {
4843         struct chunk_record *rec;
4844
4845         rec = container_of(cache, struct chunk_record, cache);
4846         list_del_init(&rec->list);
4847         list_del_init(&rec->dextents);
4848         free(rec);
4849 }
4850
4851 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4852 {
4853         cache_tree_free_extents(chunk_cache, free_chunk_record);
4854 }
4855
4856 static void free_device_record(struct rb_node *node)
4857 {
4858         struct device_record *rec;
4859
4860         rec = container_of(node, struct device_record, node);
4861         free(rec);
4862 }
4863
4864 FREE_RB_BASED_TREE(device_cache, free_device_record);
4865
4866 int insert_block_group_record(struct block_group_tree *tree,
4867                               struct block_group_record *bg_rec)
4868 {
4869         int ret;
4870
4871         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4872         if (ret)
4873                 return ret;
4874
4875         list_add_tail(&bg_rec->list, &tree->block_groups);
4876         return 0;
4877 }
4878
4879 static void free_block_group_record(struct cache_extent *cache)
4880 {
4881         struct block_group_record *rec;
4882
4883         rec = container_of(cache, struct block_group_record, cache);
4884         list_del_init(&rec->list);
4885         free(rec);
4886 }
4887
4888 void free_block_group_tree(struct block_group_tree *tree)
4889 {
4890         cache_tree_free_extents(&tree->tree, free_block_group_record);
4891 }
4892
4893 int insert_device_extent_record(struct device_extent_tree *tree,
4894                                 struct device_extent_record *de_rec)
4895 {
4896         int ret;
4897
4898         /*
4899          * Device extent is a bit different from the other extents, because
4900          * the extents which belong to the different devices may have the
4901          * same start and size, so we need use the special extent cache
4902          * search/insert functions.
4903          */
4904         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4905         if (ret)
4906                 return ret;
4907
4908         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4909         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4910         return 0;
4911 }
4912
4913 static void free_device_extent_record(struct cache_extent *cache)
4914 {
4915         struct device_extent_record *rec;
4916
4917         rec = container_of(cache, struct device_extent_record, cache);
4918         if (!list_empty(&rec->chunk_list))
4919                 list_del_init(&rec->chunk_list);
4920         if (!list_empty(&rec->device_list))
4921                 list_del_init(&rec->device_list);
4922         free(rec);
4923 }
4924
4925 void free_device_extent_tree(struct device_extent_tree *tree)
4926 {
4927         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4928 }
4929
4930 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4931 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4932                                  struct extent_buffer *leaf, int slot)
4933 {
4934         struct btrfs_extent_ref_v0 *ref0;
4935         struct btrfs_key key;
4936
4937         btrfs_item_key_to_cpu(leaf, &key, slot);
4938         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4939         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4940                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4941         } else {
4942                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4943                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4944         }
4945         return 0;
4946 }
4947 #endif
4948
4949 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4950                                             struct btrfs_key *key,
4951                                             int slot)
4952 {
4953         struct btrfs_chunk *ptr;
4954         struct chunk_record *rec;
4955         int num_stripes, i;
4956
4957         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4958         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4959
4960         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4961         if (!rec) {
4962                 fprintf(stderr, "memory allocation failed\n");
4963                 exit(-1);
4964         }
4965
4966         INIT_LIST_HEAD(&rec->list);
4967         INIT_LIST_HEAD(&rec->dextents);
4968         rec->bg_rec = NULL;
4969
4970         rec->cache.start = key->offset;
4971         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4972
4973         rec->generation = btrfs_header_generation(leaf);
4974
4975         rec->objectid = key->objectid;
4976         rec->type = key->type;
4977         rec->offset = key->offset;
4978
4979         rec->length = rec->cache.size;
4980         rec->owner = btrfs_chunk_owner(leaf, ptr);
4981         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4982         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4983         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4984         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4985         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4986         rec->num_stripes = num_stripes;
4987         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4988
4989         for (i = 0; i < rec->num_stripes; ++i) {
4990                 rec->stripes[i].devid =
4991                         btrfs_stripe_devid_nr(leaf, ptr, i);
4992                 rec->stripes[i].offset =
4993                         btrfs_stripe_offset_nr(leaf, ptr, i);
4994                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4995                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4996                                 BTRFS_UUID_SIZE);
4997         }
4998
4999         return rec;
5000 }
5001
5002 static int process_chunk_item(struct cache_tree *chunk_cache,
5003                               struct btrfs_key *key, struct extent_buffer *eb,
5004                               int slot)
5005 {
5006         struct chunk_record *rec;
5007         int ret = 0;
5008
5009         rec = btrfs_new_chunk_record(eb, key, slot);
5010         ret = insert_cache_extent(chunk_cache, &rec->cache);
5011         if (ret) {
5012                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5013                         rec->offset, rec->length);
5014                 free(rec);
5015         }
5016
5017         return ret;
5018 }
5019
5020 static int process_device_item(struct rb_root *dev_cache,
5021                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5022 {
5023         struct btrfs_dev_item *ptr;
5024         struct device_record *rec;
5025         int ret = 0;
5026
5027         ptr = btrfs_item_ptr(eb,
5028                 slot, struct btrfs_dev_item);
5029
5030         rec = malloc(sizeof(*rec));
5031         if (!rec) {
5032                 fprintf(stderr, "memory allocation failed\n");
5033                 return -ENOMEM;
5034         }
5035
5036         rec->devid = key->offset;
5037         rec->generation = btrfs_header_generation(eb);
5038
5039         rec->objectid = key->objectid;
5040         rec->type = key->type;
5041         rec->offset = key->offset;
5042
5043         rec->devid = btrfs_device_id(eb, ptr);
5044         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5045         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5046
5047         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5048         if (ret) {
5049                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5050                 free(rec);
5051         }
5052
5053         return ret;
5054 }
5055
5056 struct block_group_record *
5057 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5058                              int slot)
5059 {
5060         struct btrfs_block_group_item *ptr;
5061         struct block_group_record *rec;
5062
5063         rec = calloc(1, sizeof(*rec));
5064         if (!rec) {
5065                 fprintf(stderr, "memory allocation failed\n");
5066                 exit(-1);
5067         }
5068
5069         rec->cache.start = key->objectid;
5070         rec->cache.size = key->offset;
5071
5072         rec->generation = btrfs_header_generation(leaf);
5073
5074         rec->objectid = key->objectid;
5075         rec->type = key->type;
5076         rec->offset = key->offset;
5077
5078         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5079         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5080
5081         INIT_LIST_HEAD(&rec->list);
5082
5083         return rec;
5084 }
5085
5086 static int process_block_group_item(struct block_group_tree *block_group_cache,
5087                                     struct btrfs_key *key,
5088                                     struct extent_buffer *eb, int slot)
5089 {
5090         struct block_group_record *rec;
5091         int ret = 0;
5092
5093         rec = btrfs_new_block_group_record(eb, key, slot);
5094         ret = insert_block_group_record(block_group_cache, rec);
5095         if (ret) {
5096                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5097                         rec->objectid, rec->offset);
5098                 free(rec);
5099         }
5100
5101         return ret;
5102 }
5103
5104 struct device_extent_record *
5105 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5106                                struct btrfs_key *key, int slot)
5107 {
5108         struct device_extent_record *rec;
5109         struct btrfs_dev_extent *ptr;
5110
5111         rec = calloc(1, sizeof(*rec));
5112         if (!rec) {
5113                 fprintf(stderr, "memory allocation failed\n");
5114                 exit(-1);
5115         }
5116
5117         rec->cache.objectid = key->objectid;
5118         rec->cache.start = key->offset;
5119
5120         rec->generation = btrfs_header_generation(leaf);
5121
5122         rec->objectid = key->objectid;
5123         rec->type = key->type;
5124         rec->offset = key->offset;
5125
5126         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5127         rec->chunk_objecteid =
5128                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5129         rec->chunk_offset =
5130                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5131         rec->length = btrfs_dev_extent_length(leaf, ptr);
5132         rec->cache.size = rec->length;
5133
5134         INIT_LIST_HEAD(&rec->chunk_list);
5135         INIT_LIST_HEAD(&rec->device_list);
5136
5137         return rec;
5138 }
5139
5140 static int
5141 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5142                            struct btrfs_key *key, struct extent_buffer *eb,
5143                            int slot)
5144 {
5145         struct device_extent_record *rec;
5146         int ret;
5147
5148         rec = btrfs_new_device_extent_record(eb, key, slot);
5149         ret = insert_device_extent_record(dev_extent_cache, rec);
5150         if (ret) {
5151                 fprintf(stderr,
5152                         "Device extent[%llu, %llu, %llu] existed.\n",
5153                         rec->objectid, rec->offset, rec->length);
5154                 free(rec);
5155         }
5156
5157         return ret;
5158 }
5159
5160 static int process_extent_item(struct btrfs_root *root,
5161                                struct cache_tree *extent_cache,
5162                                struct extent_buffer *eb, int slot)
5163 {
5164         struct btrfs_extent_item *ei;
5165         struct btrfs_extent_inline_ref *iref;
5166         struct btrfs_extent_data_ref *dref;
5167         struct btrfs_shared_data_ref *sref;
5168         struct btrfs_key key;
5169         unsigned long end;
5170         unsigned long ptr;
5171         int type;
5172         u32 item_size = btrfs_item_size_nr(eb, slot);
5173         u64 refs = 0;
5174         u64 offset;
5175         u64 num_bytes;
5176         int metadata = 0;
5177
5178         btrfs_item_key_to_cpu(eb, &key, slot);
5179
5180         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5181                 metadata = 1;
5182                 num_bytes = root->leafsize;
5183         } else {
5184                 num_bytes = key.offset;
5185         }
5186
5187         if (item_size < sizeof(*ei)) {
5188 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5189                 struct btrfs_extent_item_v0 *ei0;
5190                 BUG_ON(item_size != sizeof(*ei0));
5191                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5192                 refs = btrfs_extent_refs_v0(eb, ei0);
5193 #else
5194                 BUG();
5195 #endif
5196                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5197                                       num_bytes, refs, 0, 0, 0, metadata, 1,
5198                                       num_bytes);
5199         }
5200
5201         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5202         refs = btrfs_extent_refs(eb, ei);
5203         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5204                 metadata = 1;
5205         else
5206                 metadata = 0;
5207
5208         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5209                        refs, 0, 0, 0, metadata, 1, num_bytes);
5210
5211         ptr = (unsigned long)(ei + 1);
5212         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5213             key.type == BTRFS_EXTENT_ITEM_KEY)
5214                 ptr += sizeof(struct btrfs_tree_block_info);
5215
5216         end = (unsigned long)ei + item_size;
5217         while (ptr < end) {
5218                 iref = (struct btrfs_extent_inline_ref *)ptr;
5219                 type = btrfs_extent_inline_ref_type(eb, iref);
5220                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5221                 switch (type) {
5222                 case BTRFS_TREE_BLOCK_REF_KEY:
5223                         add_tree_backref(extent_cache, key.objectid,
5224                                          0, offset, 0);
5225                         break;
5226                 case BTRFS_SHARED_BLOCK_REF_KEY:
5227                         add_tree_backref(extent_cache, key.objectid,
5228                                          offset, 0, 0);
5229                         break;
5230                 case BTRFS_EXTENT_DATA_REF_KEY:
5231                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5232                         add_data_backref(extent_cache, key.objectid, 0,
5233                                         btrfs_extent_data_ref_root(eb, dref),
5234                                         btrfs_extent_data_ref_objectid(eb,
5235                                                                        dref),
5236                                         btrfs_extent_data_ref_offset(eb, dref),
5237                                         btrfs_extent_data_ref_count(eb, dref),
5238                                         0, num_bytes);
5239                         break;
5240                 case BTRFS_SHARED_DATA_REF_KEY:
5241                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5242                         add_data_backref(extent_cache, key.objectid, offset,
5243                                         0, 0, 0,
5244                                         btrfs_shared_data_ref_count(eb, sref),
5245                                         0, num_bytes);
5246                         break;
5247                 default:
5248                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5249                                 key.objectid, key.type, num_bytes);
5250                         goto out;
5251                 }
5252                 ptr += btrfs_extent_inline_ref_size(type);
5253         }
5254         WARN_ON(ptr > end);
5255 out:
5256         return 0;
5257 }
5258
5259 static int check_cache_range(struct btrfs_root *root,
5260                              struct btrfs_block_group_cache *cache,
5261                              u64 offset, u64 bytes)
5262 {
5263         struct btrfs_free_space *entry;
5264         u64 *logical;
5265         u64 bytenr;
5266         int stripe_len;
5267         int i, nr, ret;
5268
5269         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5270                 bytenr = btrfs_sb_offset(i);
5271                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5272                                        cache->key.objectid, bytenr, 0,
5273                                        &logical, &nr, &stripe_len);
5274                 if (ret)
5275                         return ret;
5276
5277                 while (nr--) {
5278                         if (logical[nr] + stripe_len <= offset)
5279                                 continue;
5280                         if (offset + bytes <= logical[nr])
5281                                 continue;
5282                         if (logical[nr] == offset) {
5283                                 if (stripe_len >= bytes) {
5284                                         kfree(logical);
5285                                         return 0;
5286                                 }
5287                                 bytes -= stripe_len;
5288                                 offset += stripe_len;
5289                         } else if (logical[nr] < offset) {
5290                                 if (logical[nr] + stripe_len >=
5291                                     offset + bytes) {
5292                                         kfree(logical);
5293                                         return 0;
5294                                 }
5295                                 bytes = (offset + bytes) -
5296                                         (logical[nr] + stripe_len);
5297                                 offset = logical[nr] + stripe_len;
5298                         } else {
5299                                 /*
5300                                  * Could be tricky, the super may land in the
5301                                  * middle of the area we're checking.  First
5302                                  * check the easiest case, it's at the end.
5303                                  */
5304                                 if (logical[nr] + stripe_len >=
5305                                     bytes + offset) {
5306                                         bytes = logical[nr] - offset;
5307                                         continue;
5308                                 }
5309
5310                                 /* Check the left side */
5311                                 ret = check_cache_range(root, cache,
5312                                                         offset,
5313                                                         logical[nr] - offset);
5314                                 if (ret) {
5315                                         kfree(logical);
5316                                         return ret;
5317                                 }
5318
5319                                 /* Now we continue with the right side */
5320                                 bytes = (offset + bytes) -
5321                                         (logical[nr] + stripe_len);
5322                                 offset = logical[nr] + stripe_len;
5323                         }
5324                 }
5325
5326                 kfree(logical);
5327         }
5328
5329         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5330         if (!entry) {
5331                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5332                         offset, offset+bytes);
5333                 return -EINVAL;
5334         }
5335
5336         if (entry->offset != offset) {
5337                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5338                         entry->offset);
5339                 return -EINVAL;
5340         }
5341
5342         if (entry->bytes != bytes) {
5343                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5344                         bytes, entry->bytes, offset);
5345                 return -EINVAL;
5346         }
5347
5348         unlink_free_space(cache->free_space_ctl, entry);
5349         free(entry);
5350         return 0;
5351 }
5352
5353 static int verify_space_cache(struct btrfs_root *root,
5354                               struct btrfs_block_group_cache *cache)
5355 {
5356         struct btrfs_path *path;
5357         struct extent_buffer *leaf;
5358         struct btrfs_key key;
5359         u64 last;
5360         int ret = 0;
5361
5362         path = btrfs_alloc_path();
5363         if (!path)
5364                 return -ENOMEM;
5365
5366         root = root->fs_info->extent_root;
5367
5368         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5369
5370         key.objectid = last;
5371         key.offset = 0;
5372         key.type = BTRFS_EXTENT_ITEM_KEY;
5373
5374         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5375         if (ret < 0)
5376                 goto out;
5377         ret = 0;
5378         while (1) {
5379                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5380                         ret = btrfs_next_leaf(root, path);
5381                         if (ret < 0)
5382                                 goto out;
5383                         if (ret > 0) {
5384                                 ret = 0;
5385                                 break;
5386                         }
5387                 }
5388                 leaf = path->nodes[0];
5389                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5390                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5391                         break;
5392                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5393                     key.type != BTRFS_METADATA_ITEM_KEY) {
5394                         path->slots[0]++;
5395                         continue;
5396                 }
5397
5398                 if (last == key.objectid) {
5399                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5400                                 last = key.objectid + key.offset;
5401                         else
5402                                 last = key.objectid + root->leafsize;
5403                         path->slots[0]++;
5404                         continue;
5405                 }
5406
5407                 ret = check_cache_range(root, cache, last,
5408                                         key.objectid - last);
5409                 if (ret)
5410                         break;
5411                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5412                         last = key.objectid + key.offset;
5413                 else
5414                         last = key.objectid + root->leafsize;
5415                 path->slots[0]++;
5416         }
5417
5418         if (last < cache->key.objectid + cache->key.offset)
5419                 ret = check_cache_range(root, cache, last,
5420                                         cache->key.objectid +
5421                                         cache->key.offset - last);
5422
5423 out:
5424         btrfs_free_path(path);
5425
5426         if (!ret &&
5427             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5428                 fprintf(stderr, "There are still entries left in the space "
5429                         "cache\n");
5430                 ret = -EINVAL;
5431         }
5432
5433         return ret;
5434 }
5435
5436 static int check_space_cache(struct btrfs_root *root)
5437 {
5438         struct btrfs_block_group_cache *cache;
5439         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5440         int ret;
5441         int error = 0;
5442
5443         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5444             btrfs_super_generation(root->fs_info->super_copy) !=
5445             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5446                 printf("cache and super generation don't match, space cache "
5447                        "will be invalidated\n");
5448                 return 0;
5449         }
5450
5451         if (ctx.progress_enabled) {
5452                 ctx.tp = TASK_FREE_SPACE;
5453                 task_start(ctx.info);
5454         }
5455
5456         while (1) {
5457                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5458                 if (!cache)
5459                         break;
5460
5461                 start = cache->key.objectid + cache->key.offset;
5462                 if (!cache->free_space_ctl) {
5463                         if (btrfs_init_free_space_ctl(cache,
5464                                                       root->sectorsize)) {
5465                                 ret = -ENOMEM;
5466                                 break;
5467                         }
5468                 } else {
5469                         btrfs_remove_free_space_cache(cache);
5470                 }
5471
5472                 ret = load_free_space_cache(root->fs_info, cache);
5473                 if (!ret)
5474                         continue;
5475
5476                 ret = verify_space_cache(root, cache);
5477                 if (ret) {
5478                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
5479                                 cache->key.objectid);
5480                         error++;
5481                 }
5482         }
5483
5484         task_stop(ctx.info);
5485
5486         return error ? -EINVAL : 0;
5487 }
5488
5489 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5490                         u64 num_bytes, unsigned long leaf_offset,
5491                         struct extent_buffer *eb) {
5492
5493         u64 offset = 0;
5494         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5495         char *data;
5496         unsigned long csum_offset;
5497         u32 csum;
5498         u32 csum_expected;
5499         u64 read_len;
5500         u64 data_checked = 0;
5501         u64 tmp;
5502         int ret = 0;
5503         int mirror;
5504         int num_copies;
5505
5506         if (num_bytes % root->sectorsize)
5507                 return -EINVAL;
5508
5509         data = malloc(num_bytes);
5510         if (!data)
5511                 return -ENOMEM;
5512
5513         while (offset < num_bytes) {
5514                 mirror = 0;
5515 again:
5516                 read_len = num_bytes - offset;
5517                 /* read as much space once a time */
5518                 ret = read_extent_data(root, data + offset,
5519                                 bytenr + offset, &read_len, mirror);
5520                 if (ret)
5521                         goto out;
5522                 data_checked = 0;
5523                 /* verify every 4k data's checksum */
5524                 while (data_checked < read_len) {
5525                         csum = ~(u32)0;
5526                         tmp = offset + data_checked;
5527
5528                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5529                                                csum, root->sectorsize);
5530                         btrfs_csum_final(csum, (char *)&csum);
5531
5532                         csum_offset = leaf_offset +
5533                                  tmp / root->sectorsize * csum_size;
5534                         read_extent_buffer(eb, (char *)&csum_expected,
5535                                            csum_offset, csum_size);
5536                         /* try another mirror */
5537                         if (csum != csum_expected) {
5538                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5539                                                 mirror, bytenr + tmp,
5540                                                 csum, csum_expected);
5541                                 num_copies = btrfs_num_copies(
5542                                                 &root->fs_info->mapping_tree,
5543                                                 bytenr, num_bytes);
5544                                 if (mirror < num_copies - 1) {
5545                                         mirror += 1;
5546                                         goto again;
5547                                 }
5548                         }
5549                         data_checked += root->sectorsize;
5550                 }
5551                 offset += read_len;
5552         }
5553 out:
5554         free(data);
5555         return ret;
5556 }
5557
5558 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5559                                u64 num_bytes)
5560 {
5561         struct btrfs_path *path;
5562         struct extent_buffer *leaf;
5563         struct btrfs_key key;
5564         int ret;
5565
5566         path = btrfs_alloc_path();
5567         if (!path) {
5568                 fprintf(stderr, "Error allocing path\n");
5569                 return -ENOMEM;
5570         }
5571
5572         key.objectid = bytenr;
5573         key.type = BTRFS_EXTENT_ITEM_KEY;
5574         key.offset = (u64)-1;
5575
5576 again:
5577         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5578                                 0, 0);
5579         if (ret < 0) {
5580                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5581                 btrfs_free_path(path);
5582                 return ret;
5583         } else if (ret) {
5584                 if (path->slots[0] > 0) {
5585                         path->slots[0]--;
5586                 } else {
5587                         ret = btrfs_prev_leaf(root, path);
5588                         if (ret < 0) {
5589                                 goto out;
5590                         } else if (ret > 0) {
5591                                 ret = 0;
5592                                 goto out;
5593                         }
5594                 }
5595         }
5596
5597         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5598
5599         /*
5600          * Block group items come before extent items if they have the same
5601          * bytenr, so walk back one more just in case.  Dear future traveler,
5602          * first congrats on mastering time travel.  Now if it's not too much
5603          * trouble could you go back to 2006 and tell Chris to make the
5604          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5605          * EXTENT_ITEM_KEY please?
5606          */
5607         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5608                 if (path->slots[0] > 0) {
5609                         path->slots[0]--;
5610                 } else {
5611                         ret = btrfs_prev_leaf(root, path);
5612                         if (ret < 0) {
5613                                 goto out;
5614                         } else if (ret > 0) {
5615                                 ret = 0;
5616                                 goto out;
5617                         }
5618                 }
5619                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5620         }
5621
5622         while (num_bytes) {
5623                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5624                         ret = btrfs_next_leaf(root, path);
5625                         if (ret < 0) {
5626                                 fprintf(stderr, "Error going to next leaf "
5627                                         "%d\n", ret);
5628                                 btrfs_free_path(path);
5629                                 return ret;
5630                         } else if (ret) {
5631                                 break;
5632                         }
5633                 }
5634                 leaf = path->nodes[0];
5635                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5636                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5637                         path->slots[0]++;
5638                         continue;
5639                 }
5640                 if (key.objectid + key.offset < bytenr) {
5641                         path->slots[0]++;
5642                         continue;
5643                 }
5644                 if (key.objectid > bytenr + num_bytes)
5645                         break;
5646
5647                 if (key.objectid == bytenr) {
5648                         if (key.offset >= num_bytes) {
5649                                 num_bytes = 0;
5650                                 break;
5651                         }
5652                         num_bytes -= key.offset;
5653                         bytenr += key.offset;
5654                 } else if (key.objectid < bytenr) {
5655                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5656                                 num_bytes = 0;
5657                                 break;
5658                         }
5659                         num_bytes = (bytenr + num_bytes) -
5660                                 (key.objectid + key.offset);
5661                         bytenr = key.objectid + key.offset;
5662                 } else {
5663                         if (key.objectid + key.offset < bytenr + num_bytes) {
5664                                 u64 new_start = key.objectid + key.offset;
5665                                 u64 new_bytes = bytenr + num_bytes - new_start;
5666
5667                                 /*
5668                                  * Weird case, the extent is in the middle of
5669                                  * our range, we'll have to search one side
5670                                  * and then the other.  Not sure if this happens
5671                                  * in real life, but no harm in coding it up
5672                                  * anyway just in case.
5673                                  */
5674                                 btrfs_release_path(path);
5675                                 ret = check_extent_exists(root, new_start,
5676                                                           new_bytes);
5677                                 if (ret) {
5678                                         fprintf(stderr, "Right section didn't "
5679                                                 "have a record\n");
5680                                         break;
5681                                 }
5682                                 num_bytes = key.objectid - bytenr;
5683                                 goto again;
5684                         }
5685                         num_bytes = key.objectid - bytenr;
5686                 }
5687                 path->slots[0]++;
5688         }
5689         ret = 0;
5690
5691 out:
5692         if (num_bytes && !ret) {
5693                 fprintf(stderr, "There are no extents for csum range "
5694                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5695                 ret = 1;
5696         }
5697
5698         btrfs_free_path(path);
5699         return ret;
5700 }
5701
5702 static int check_csums(struct btrfs_root *root)
5703 {
5704         struct btrfs_path *path;
5705         struct extent_buffer *leaf;
5706         struct btrfs_key key;
5707         u64 offset = 0, num_bytes = 0;
5708         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5709         int errors = 0;
5710         int ret;
5711         u64 data_len;
5712         unsigned long leaf_offset;
5713
5714         root = root->fs_info->csum_root;
5715         if (!extent_buffer_uptodate(root->node)) {
5716                 fprintf(stderr, "No valid csum tree found\n");
5717                 return -ENOENT;
5718         }
5719
5720         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5721         key.type = BTRFS_EXTENT_CSUM_KEY;
5722         key.offset = 0;
5723
5724         path = btrfs_alloc_path();
5725         if (!path)
5726                 return -ENOMEM;
5727
5728         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5729         if (ret < 0) {
5730                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5731                 btrfs_free_path(path);
5732                 return ret;
5733         }
5734
5735         if (ret > 0 && path->slots[0])
5736                 path->slots[0]--;
5737         ret = 0;
5738
5739         while (1) {
5740                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5741                         ret = btrfs_next_leaf(root, path);
5742                         if (ret < 0) {
5743                                 fprintf(stderr, "Error going to next leaf "
5744                                         "%d\n", ret);
5745                                 break;
5746                         }
5747                         if (ret)
5748                                 break;
5749                 }
5750                 leaf = path->nodes[0];
5751
5752                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5753                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5754                         path->slots[0]++;
5755                         continue;
5756                 }
5757
5758                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5759                               csum_size) * root->sectorsize;
5760                 if (!check_data_csum)
5761                         goto skip_csum_check;
5762                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5763                 ret = check_extent_csums(root, key.offset, data_len,
5764                                          leaf_offset, leaf);
5765                 if (ret)
5766                         break;
5767 skip_csum_check:
5768                 if (!num_bytes) {
5769                         offset = key.offset;
5770                 } else if (key.offset != offset + num_bytes) {
5771                         ret = check_extent_exists(root, offset, num_bytes);
5772                         if (ret) {
5773                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5774                                         "there is no extent record\n",
5775                                         offset, offset+num_bytes);
5776                                 errors++;
5777                         }
5778                         offset = key.offset;
5779                         num_bytes = 0;
5780                 }
5781                 num_bytes += data_len;
5782                 path->slots[0]++;
5783         }
5784
5785         btrfs_free_path(path);
5786         return errors;
5787 }
5788
5789 static int is_dropped_key(struct btrfs_key *key,
5790                           struct btrfs_key *drop_key) {
5791         if (key->objectid < drop_key->objectid)
5792                 return 1;
5793         else if (key->objectid == drop_key->objectid) {
5794                 if (key->type < drop_key->type)
5795                         return 1;
5796                 else if (key->type == drop_key->type) {
5797                         if (key->offset < drop_key->offset)
5798                                 return 1;
5799                 }
5800         }
5801         return 0;
5802 }
5803
5804 /*
5805  * Here are the rules for FULL_BACKREF.
5806  *
5807  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5808  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5809  *      FULL_BACKREF set.
5810  * 3) We cow'ed the block walking down a reloc tree.  This is impossible to tell
5811  *    if it happened after the relocation occurred since we'll have dropped the
5812  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5813  *    have no real way to know for sure.
5814  *
5815  * We process the blocks one root at a time, and we start from the lowest root
5816  * objectid and go to the highest.  So we can just lookup the owner backref for
5817  * the record and if we don't find it then we know it doesn't exist and we have
5818  * a FULL BACKREF.
5819  *
5820  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5821  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5822  * be set or not and then we can check later once we've gathered all the refs.
5823  */
5824 static int calc_extent_flag(struct btrfs_root *root,
5825                            struct cache_tree *extent_cache,
5826                            struct extent_buffer *buf,
5827                            struct root_item_record *ri,
5828                            u64 *flags)
5829 {
5830         struct extent_record *rec;
5831         struct cache_extent *cache;
5832         struct tree_backref *tback;
5833         u64 owner = 0;
5834
5835         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5836         /* we have added this extent before */
5837         BUG_ON(!cache);
5838         rec = container_of(cache, struct extent_record, cache);
5839
5840         /*
5841          * Except file/reloc tree, we can not have
5842          * FULL BACKREF MODE
5843          */
5844         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5845                 goto normal;
5846         /*
5847          * root node
5848          */
5849         if (buf->start == ri->bytenr)
5850                 goto normal;
5851
5852         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5853                 goto full_backref;
5854
5855         owner = btrfs_header_owner(buf);
5856         if (owner == ri->objectid)
5857                 goto normal;
5858
5859         tback = find_tree_backref(rec, 0, owner);
5860         if (!tback)
5861                 goto full_backref;
5862 normal:
5863         *flags = 0;
5864         if (rec->flag_block_full_backref != -1 &&
5865             rec->flag_block_full_backref != 0)
5866                 rec->bad_full_backref = 1;
5867         return 0;
5868 full_backref:
5869         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5870         if (rec->flag_block_full_backref != -1 &&
5871             rec->flag_block_full_backref != 1)
5872                 rec->bad_full_backref = 1;
5873         return 0;
5874 }
5875
5876 static int run_next_block(struct btrfs_root *root,
5877                           struct block_info *bits,
5878                           int bits_nr,
5879                           u64 *last,
5880                           struct cache_tree *pending,
5881                           struct cache_tree *seen,
5882                           struct cache_tree *reada,
5883                           struct cache_tree *nodes,
5884                           struct cache_tree *extent_cache,
5885                           struct cache_tree *chunk_cache,
5886                           struct rb_root *dev_cache,
5887                           struct block_group_tree *block_group_cache,
5888                           struct device_extent_tree *dev_extent_cache,
5889                           struct root_item_record *ri)
5890 {
5891         struct extent_buffer *buf;
5892         struct extent_record *rec = NULL;
5893         u64 bytenr;
5894         u32 size;
5895         u64 parent;
5896         u64 owner;
5897         u64 flags;
5898         u64 ptr;
5899         u64 gen = 0;
5900         int ret = 0;
5901         int i;
5902         int nritems;
5903         struct btrfs_key key;
5904         struct cache_extent *cache;
5905         int reada_bits;
5906
5907         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5908                                     bits_nr, &reada_bits);
5909         if (nritems == 0)
5910                 return 1;
5911
5912         if (!reada_bits) {
5913                 for(i = 0; i < nritems; i++) {
5914                         ret = add_cache_extent(reada, bits[i].start,
5915                                                bits[i].size);
5916                         if (ret == -EEXIST)
5917                                 continue;
5918
5919                         /* fixme, get the parent transid */
5920                         readahead_tree_block(root, bits[i].start,
5921                                              bits[i].size, 0);
5922                 }
5923         }
5924         *last = bits[0].start;
5925         bytenr = bits[0].start;
5926         size = bits[0].size;
5927
5928         cache = lookup_cache_extent(pending, bytenr, size);
5929         if (cache) {
5930                 remove_cache_extent(pending, cache);
5931                 free(cache);
5932         }
5933         cache = lookup_cache_extent(reada, bytenr, size);
5934         if (cache) {
5935                 remove_cache_extent(reada, cache);
5936                 free(cache);
5937         }
5938         cache = lookup_cache_extent(nodes, bytenr, size);
5939         if (cache) {
5940                 remove_cache_extent(nodes, cache);
5941                 free(cache);
5942         }
5943         cache = lookup_cache_extent(extent_cache, bytenr, size);
5944         if (cache) {
5945                 rec = container_of(cache, struct extent_record, cache);
5946                 gen = rec->parent_generation;
5947         }
5948
5949         /* fixme, get the real parent transid */
5950         buf = read_tree_block(root, bytenr, size, gen);
5951         if (!extent_buffer_uptodate(buf)) {
5952                 record_bad_block_io(root->fs_info,
5953                                     extent_cache, bytenr, size);
5954                 goto out;
5955         }
5956
5957         nritems = btrfs_header_nritems(buf);
5958
5959         flags = 0;
5960         if (!init_extent_tree) {
5961                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5962                                        btrfs_header_level(buf), 1, NULL,
5963                                        &flags);
5964                 if (ret < 0) {
5965                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5966                         if (ret < 0) {
5967                                 fprintf(stderr, "Couldn't calc extent flags\n");
5968                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5969                         }
5970                 }
5971         } else {
5972                 flags = 0;
5973                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5974                 if (ret < 0) {
5975                         fprintf(stderr, "Couldn't calc extent flags\n");
5976                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5977                 }
5978         }
5979
5980         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5981                 if (ri != NULL &&
5982                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5983                     ri->objectid == btrfs_header_owner(buf)) {
5984                         /*
5985                          * Ok we got to this block from it's original owner and
5986                          * we have FULL_BACKREF set.  Relocation can leave
5987                          * converted blocks over so this is altogether possible,
5988                          * however it's not possible if the generation > the
5989                          * last snapshot, so check for this case.
5990                          */
5991                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5992                             btrfs_header_generation(buf) > ri->last_snapshot) {
5993                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5994                                 rec->bad_full_backref = 1;
5995                         }
5996                 }
5997         } else {
5998                 if (ri != NULL &&
5999                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6000                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6001                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6002                         rec->bad_full_backref = 1;
6003                 }
6004         }
6005
6006         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6007                 rec->flag_block_full_backref = 1;
6008                 parent = bytenr;
6009                 owner = 0;
6010         } else {
6011                 rec->flag_block_full_backref = 0;
6012                 parent = 0;
6013                 owner = btrfs_header_owner(buf);
6014         }
6015
6016         ret = check_block(root, extent_cache, buf, flags);
6017         if (ret)
6018                 goto out;
6019
6020         if (btrfs_is_leaf(buf)) {
6021                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6022                 for (i = 0; i < nritems; i++) {
6023                         struct btrfs_file_extent_item *fi;
6024                         btrfs_item_key_to_cpu(buf, &key, i);
6025                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6026                                 process_extent_item(root, extent_cache, buf,
6027                                                     i);
6028                                 continue;
6029                         }
6030                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6031                                 process_extent_item(root, extent_cache, buf,
6032                                                     i);
6033                                 continue;
6034                         }
6035                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6036                                 total_csum_bytes +=
6037                                         btrfs_item_size_nr(buf, i);
6038                                 continue;
6039                         }
6040                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6041                                 process_chunk_item(chunk_cache, &key, buf, i);
6042                                 continue;
6043                         }
6044                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6045                                 process_device_item(dev_cache, &key, buf, i);
6046                                 continue;
6047                         }
6048                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6049                                 process_block_group_item(block_group_cache,
6050                                         &key, buf, i);
6051                                 continue;
6052                         }
6053                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6054                                 process_device_extent_item(dev_extent_cache,
6055                                         &key, buf, i);
6056                                 continue;
6057
6058                         }
6059                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6060 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6061                                 process_extent_ref_v0(extent_cache, buf, i);
6062 #else
6063                                 BUG();
6064 #endif
6065                                 continue;
6066                         }
6067
6068                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6069                                 add_tree_backref(extent_cache, key.objectid, 0,
6070                                                  key.offset, 0);
6071                                 continue;
6072                         }
6073                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6074                                 add_tree_backref(extent_cache, key.objectid,
6075                                                  key.offset, 0, 0);
6076                                 continue;
6077                         }
6078                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6079                                 struct btrfs_extent_data_ref *ref;
6080                                 ref = btrfs_item_ptr(buf, i,
6081                                                 struct btrfs_extent_data_ref);
6082                                 add_data_backref(extent_cache,
6083                                         key.objectid, 0,
6084                                         btrfs_extent_data_ref_root(buf, ref),
6085                                         btrfs_extent_data_ref_objectid(buf,
6086                                                                        ref),
6087                                         btrfs_extent_data_ref_offset(buf, ref),
6088                                         btrfs_extent_data_ref_count(buf, ref),
6089                                         0, root->sectorsize);
6090                                 continue;
6091                         }
6092                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6093                                 struct btrfs_shared_data_ref *ref;
6094                                 ref = btrfs_item_ptr(buf, i,
6095                                                 struct btrfs_shared_data_ref);
6096                                 add_data_backref(extent_cache,
6097                                         key.objectid, key.offset, 0, 0, 0,
6098                                         btrfs_shared_data_ref_count(buf, ref),
6099                                         0, root->sectorsize);
6100                                 continue;
6101                         }
6102                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6103                                 struct bad_item *bad;
6104
6105                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6106                                         continue;
6107                                 if (!owner)
6108                                         continue;
6109                                 bad = malloc(sizeof(struct bad_item));
6110                                 if (!bad)
6111                                         continue;
6112                                 INIT_LIST_HEAD(&bad->list);
6113                                 memcpy(&bad->key, &key,
6114                                        sizeof(struct btrfs_key));
6115                                 bad->root_id = owner;
6116                                 list_add_tail(&bad->list, &delete_items);
6117                                 continue;
6118                         }
6119                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6120                                 continue;
6121                         fi = btrfs_item_ptr(buf, i,
6122                                             struct btrfs_file_extent_item);
6123                         if (btrfs_file_extent_type(buf, fi) ==
6124                             BTRFS_FILE_EXTENT_INLINE)
6125                                 continue;
6126                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6127                                 continue;
6128
6129                         data_bytes_allocated +=
6130                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6131                         if (data_bytes_allocated < root->sectorsize) {
6132                                 abort();
6133                         }
6134                         data_bytes_referenced +=
6135                                 btrfs_file_extent_num_bytes(buf, fi);
6136                         add_data_backref(extent_cache,
6137                                 btrfs_file_extent_disk_bytenr(buf, fi),
6138                                 parent, owner, key.objectid, key.offset -
6139                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6140                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6141                 }
6142         } else {
6143                 int level;
6144                 struct btrfs_key first_key;
6145
6146                 first_key.objectid = 0;
6147
6148                 if (nritems > 0)
6149                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6150                 level = btrfs_header_level(buf);
6151                 for (i = 0; i < nritems; i++) {
6152                         ptr = btrfs_node_blockptr(buf, i);
6153                         size = btrfs_level_size(root, level - 1);
6154                         btrfs_node_key_to_cpu(buf, &key, i);
6155                         if (ri != NULL) {
6156                                 if ((level == ri->drop_level)
6157                                     && is_dropped_key(&key, &ri->drop_key)) {
6158                                         continue;
6159                                 }
6160                         }
6161                         ret = add_extent_rec(extent_cache, &key,
6162                                              btrfs_node_ptr_generation(buf, i),
6163                                              ptr, size, 0, 0, 1, 0, 1, 0,
6164                                              size);
6165                         BUG_ON(ret);
6166
6167                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6168
6169                         if (level > 1) {
6170                                 add_pending(nodes, seen, ptr, size);
6171                         } else {
6172                                 add_pending(pending, seen, ptr, size);
6173                         }
6174                 }
6175                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6176                                       nritems) * sizeof(struct btrfs_key_ptr);
6177         }
6178         total_btree_bytes += buf->len;
6179         if (fs_root_objectid(btrfs_header_owner(buf)))
6180                 total_fs_tree_bytes += buf->len;
6181         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6182                 total_extent_tree_bytes += buf->len;
6183         if (!found_old_backref &&
6184             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6185             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6186             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6187                 found_old_backref = 1;
6188 out:
6189         free_extent_buffer(buf);
6190         return ret;
6191 }
6192
6193 static int add_root_to_pending(struct extent_buffer *buf,
6194                                struct cache_tree *extent_cache,
6195                                struct cache_tree *pending,
6196                                struct cache_tree *seen,
6197                                struct cache_tree *nodes,
6198                                u64 objectid)
6199 {
6200         if (btrfs_header_level(buf) > 0)
6201                 add_pending(nodes, seen, buf->start, buf->len);
6202         else
6203                 add_pending(pending, seen, buf->start, buf->len);
6204         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6205                        0, 1, 1, 0, 1, 0, buf->len);
6206
6207         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6208             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6209                 add_tree_backref(extent_cache, buf->start, buf->start,
6210                                  0, 1);
6211         else
6212                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6213         return 0;
6214 }
6215
6216 /* as we fix the tree, we might be deleting blocks that
6217  * we're tracking for repair.  This hook makes sure we
6218  * remove any backrefs for blocks as we are fixing them.
6219  */
6220 static int free_extent_hook(struct btrfs_trans_handle *trans,
6221                             struct btrfs_root *root,
6222                             u64 bytenr, u64 num_bytes, u64 parent,
6223                             u64 root_objectid, u64 owner, u64 offset,
6224                             int refs_to_drop)
6225 {
6226         struct extent_record *rec;
6227         struct cache_extent *cache;
6228         int is_data;
6229         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6230
6231         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6232         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6233         if (!cache)
6234                 return 0;
6235
6236         rec = container_of(cache, struct extent_record, cache);
6237         if (is_data) {
6238                 struct data_backref *back;
6239                 back = find_data_backref(rec, parent, root_objectid, owner,
6240                                          offset, 1, bytenr, num_bytes);
6241                 if (!back)
6242                         goto out;
6243                 if (back->node.found_ref) {
6244                         back->found_ref -= refs_to_drop;
6245                         if (rec->refs)
6246                                 rec->refs -= refs_to_drop;
6247                 }
6248                 if (back->node.found_extent_tree) {
6249                         back->num_refs -= refs_to_drop;
6250                         if (rec->extent_item_refs)
6251                                 rec->extent_item_refs -= refs_to_drop;
6252                 }
6253                 if (back->found_ref == 0)
6254                         back->node.found_ref = 0;
6255                 if (back->num_refs == 0)
6256                         back->node.found_extent_tree = 0;
6257
6258                 if (!back->node.found_extent_tree && back->node.found_ref) {
6259                         list_del(&back->node.list);
6260                         free(back);
6261                 }
6262         } else {
6263                 struct tree_backref *back;
6264                 back = find_tree_backref(rec, parent, root_objectid);
6265                 if (!back)
6266                         goto out;
6267                 if (back->node.found_ref) {
6268                         if (rec->refs)
6269                                 rec->refs--;
6270                         back->node.found_ref = 0;
6271                 }
6272                 if (back->node.found_extent_tree) {
6273                         if (rec->extent_item_refs)
6274                                 rec->extent_item_refs--;
6275                         back->node.found_extent_tree = 0;
6276                 }
6277                 if (!back->node.found_extent_tree && back->node.found_ref) {
6278                         list_del(&back->node.list);
6279                         free(back);
6280                 }
6281         }
6282         maybe_free_extent_rec(extent_cache, rec);
6283 out:
6284         return 0;
6285 }
6286
6287 static int delete_extent_records(struct btrfs_trans_handle *trans,
6288                                  struct btrfs_root *root,
6289                                  struct btrfs_path *path,
6290                                  u64 bytenr, u64 new_len)
6291 {
6292         struct btrfs_key key;
6293         struct btrfs_key found_key;
6294         struct extent_buffer *leaf;
6295         int ret;
6296         int slot;
6297
6298
6299         key.objectid = bytenr;
6300         key.type = (u8)-1;
6301         key.offset = (u64)-1;
6302
6303         while(1) {
6304                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6305                                         &key, path, 0, 1);
6306                 if (ret < 0)
6307                         break;
6308
6309                 if (ret > 0) {
6310                         ret = 0;
6311                         if (path->slots[0] == 0)
6312                                 break;
6313                         path->slots[0]--;
6314                 }
6315                 ret = 0;
6316
6317                 leaf = path->nodes[0];
6318                 slot = path->slots[0];
6319
6320                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6321                 if (found_key.objectid != bytenr)
6322                         break;
6323
6324                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6325                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6326                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6327                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6328                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6329                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6330                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6331                         btrfs_release_path(path);
6332                         if (found_key.type == 0) {
6333                                 if (found_key.offset == 0)
6334                                         break;
6335                                 key.offset = found_key.offset - 1;
6336                                 key.type = found_key.type;
6337                         }
6338                         key.type = found_key.type - 1;
6339                         key.offset = (u64)-1;
6340                         continue;
6341                 }
6342
6343                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6344                         found_key.objectid, found_key.type, found_key.offset);
6345
6346                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6347                 if (ret)
6348                         break;
6349                 btrfs_release_path(path);
6350
6351                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6352                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6353                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6354                                 found_key.offset : root->leafsize;
6355
6356                         ret = btrfs_update_block_group(trans, root, bytenr,
6357                                                        bytes, 0, 0);
6358                         if (ret)
6359                                 break;
6360                 }
6361         }
6362
6363         btrfs_release_path(path);
6364         return ret;
6365 }
6366
6367 /*
6368  * for a single backref, this will allocate a new extent
6369  * and add the backref to it.
6370  */
6371 static int record_extent(struct btrfs_trans_handle *trans,
6372                          struct btrfs_fs_info *info,
6373                          struct btrfs_path *path,
6374                          struct extent_record *rec,
6375                          struct extent_backref *back,
6376                          int allocated, u64 flags)
6377 {
6378         int ret;
6379         struct btrfs_root *extent_root = info->extent_root;
6380         struct extent_buffer *leaf;
6381         struct btrfs_key ins_key;
6382         struct btrfs_extent_item *ei;
6383         struct tree_backref *tback;
6384         struct data_backref *dback;
6385         struct btrfs_tree_block_info *bi;
6386
6387         if (!back->is_data)
6388                 rec->max_size = max_t(u64, rec->max_size,
6389                                     info->extent_root->leafsize);
6390
6391         if (!allocated) {
6392                 u32 item_size = sizeof(*ei);
6393
6394                 if (!back->is_data)
6395                         item_size += sizeof(*bi);
6396
6397                 ins_key.objectid = rec->start;
6398                 ins_key.offset = rec->max_size;
6399                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6400
6401                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6402                                         &ins_key, item_size);
6403                 if (ret)
6404                         goto fail;
6405
6406                 leaf = path->nodes[0];
6407                 ei = btrfs_item_ptr(leaf, path->slots[0],
6408                                     struct btrfs_extent_item);
6409
6410                 btrfs_set_extent_refs(leaf, ei, 0);
6411                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6412
6413                 if (back->is_data) {
6414                         btrfs_set_extent_flags(leaf, ei,
6415                                                BTRFS_EXTENT_FLAG_DATA);
6416                 } else {
6417                         struct btrfs_disk_key copy_key;;
6418
6419                         tback = (struct tree_backref *)back;
6420                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6421                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6422                                              sizeof(*bi));
6423
6424                         btrfs_set_disk_key_objectid(&copy_key,
6425                                                     rec->info_objectid);
6426                         btrfs_set_disk_key_type(&copy_key, 0);
6427                         btrfs_set_disk_key_offset(&copy_key, 0);
6428
6429                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6430                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6431
6432                         btrfs_set_extent_flags(leaf, ei,
6433                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6434                 }
6435
6436                 btrfs_mark_buffer_dirty(leaf);
6437                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6438                                                rec->max_size, 1, 0);
6439                 if (ret)
6440                         goto fail;
6441                 btrfs_release_path(path);
6442         }
6443
6444         if (back->is_data) {
6445                 u64 parent;
6446                 int i;
6447
6448                 dback = (struct data_backref *)back;
6449                 if (back->full_backref)
6450                         parent = dback->parent;
6451                 else
6452                         parent = 0;
6453
6454                 for (i = 0; i < dback->found_ref; i++) {
6455                         /* if parent != 0, we're doing a full backref
6456                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6457                          * just makes the backref allocator create a data
6458                          * backref
6459                          */
6460                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6461                                                    rec->start, rec->max_size,
6462                                                    parent,
6463                                                    dback->root,
6464                                                    parent ?
6465                                                    BTRFS_FIRST_FREE_OBJECTID :
6466                                                    dback->owner,
6467                                                    dback->offset);
6468                         if (ret)
6469                                 break;
6470                 }
6471                 fprintf(stderr, "adding new data backref"
6472                                 " on %llu %s %llu owner %llu"
6473                                 " offset %llu found %d\n",
6474                                 (unsigned long long)rec->start,
6475                                 back->full_backref ?
6476                                 "parent" : "root",
6477                                 back->full_backref ?
6478                                 (unsigned long long)parent :
6479                                 (unsigned long long)dback->root,
6480                                 (unsigned long long)dback->owner,
6481                                 (unsigned long long)dback->offset,
6482                                 dback->found_ref);
6483         } else {
6484                 u64 parent;
6485
6486                 tback = (struct tree_backref *)back;
6487                 if (back->full_backref)
6488                         parent = tback->parent;
6489                 else
6490                         parent = 0;
6491
6492                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6493                                            rec->start, rec->max_size,
6494                                            parent, tback->root, 0, 0);
6495                 fprintf(stderr, "adding new tree backref on "
6496                         "start %llu len %llu parent %llu root %llu\n",
6497                         rec->start, rec->max_size, parent, tback->root);
6498         }
6499 fail:
6500         btrfs_release_path(path);
6501         return ret;
6502 }
6503
6504 struct extent_entry {
6505         u64 bytenr;
6506         u64 bytes;
6507         int count;
6508         int broken;
6509         struct list_head list;
6510 };
6511
6512 static struct extent_entry *find_entry(struct list_head *entries,
6513                                        u64 bytenr, u64 bytes)
6514 {
6515         struct extent_entry *entry = NULL;
6516
6517         list_for_each_entry(entry, entries, list) {
6518                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6519                         return entry;
6520         }
6521
6522         return NULL;
6523 }
6524
6525 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6526 {
6527         struct extent_entry *entry, *best = NULL, *prev = NULL;
6528
6529         list_for_each_entry(entry, entries, list) {
6530                 if (!prev) {
6531                         prev = entry;
6532                         continue;
6533                 }
6534
6535                 /*
6536                  * If there are as many broken entries as entries then we know
6537                  * not to trust this particular entry.
6538                  */
6539                 if (entry->broken == entry->count)
6540                         continue;
6541
6542                 /*
6543                  * If our current entry == best then we can't be sure our best
6544                  * is really the best, so we need to keep searching.
6545                  */
6546                 if (best && best->count == entry->count) {
6547                         prev = entry;
6548                         best = NULL;
6549                         continue;
6550                 }
6551
6552                 /* Prev == entry, not good enough, have to keep searching */
6553                 if (!prev->broken && prev->count == entry->count)
6554                         continue;
6555
6556                 if (!best)
6557                         best = (prev->count > entry->count) ? prev : entry;
6558                 else if (best->count < entry->count)
6559                         best = entry;
6560                 prev = entry;
6561         }
6562
6563         return best;
6564 }
6565
6566 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6567                       struct data_backref *dback, struct extent_entry *entry)
6568 {
6569         struct btrfs_trans_handle *trans;
6570         struct btrfs_root *root;
6571         struct btrfs_file_extent_item *fi;
6572         struct extent_buffer *leaf;
6573         struct btrfs_key key;
6574         u64 bytenr, bytes;
6575         int ret, err;
6576
6577         key.objectid = dback->root;
6578         key.type = BTRFS_ROOT_ITEM_KEY;
6579         key.offset = (u64)-1;
6580         root = btrfs_read_fs_root(info, &key);
6581         if (IS_ERR(root)) {
6582                 fprintf(stderr, "Couldn't find root for our ref\n");
6583                 return -EINVAL;
6584         }
6585
6586         /*
6587          * The backref points to the original offset of the extent if it was
6588          * split, so we need to search down to the offset we have and then walk
6589          * forward until we find the backref we're looking for.
6590          */
6591         key.objectid = dback->owner;
6592         key.type = BTRFS_EXTENT_DATA_KEY;
6593         key.offset = dback->offset;
6594         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6595         if (ret < 0) {
6596                 fprintf(stderr, "Error looking up ref %d\n", ret);
6597                 return ret;
6598         }
6599
6600         while (1) {
6601                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6602                         ret = btrfs_next_leaf(root, path);
6603                         if (ret) {
6604                                 fprintf(stderr, "Couldn't find our ref, next\n");
6605                                 return -EINVAL;
6606                         }
6607                 }
6608                 leaf = path->nodes[0];
6609                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6610                 if (key.objectid != dback->owner ||
6611                     key.type != BTRFS_EXTENT_DATA_KEY) {
6612                         fprintf(stderr, "Couldn't find our ref, search\n");
6613                         return -EINVAL;
6614                 }
6615                 fi = btrfs_item_ptr(leaf, path->slots[0],
6616                                     struct btrfs_file_extent_item);
6617                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6618                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6619
6620                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6621                         break;
6622                 path->slots[0]++;
6623         }
6624
6625         btrfs_release_path(path);
6626
6627         trans = btrfs_start_transaction(root, 1);
6628         if (IS_ERR(trans))
6629                 return PTR_ERR(trans);
6630
6631         /*
6632          * Ok we have the key of the file extent we want to fix, now we can cow
6633          * down to the thing and fix it.
6634          */
6635         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6636         if (ret < 0) {
6637                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6638                         key.objectid, key.type, key.offset, ret);
6639                 goto out;
6640         }
6641         if (ret > 0) {
6642                 fprintf(stderr, "Well that's odd, we just found this key "
6643                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6644                         key.offset);
6645                 ret = -EINVAL;
6646                 goto out;
6647         }
6648         leaf = path->nodes[0];
6649         fi = btrfs_item_ptr(leaf, path->slots[0],
6650                             struct btrfs_file_extent_item);
6651
6652         if (btrfs_file_extent_compression(leaf, fi) &&
6653             dback->disk_bytenr != entry->bytenr) {
6654                 fprintf(stderr, "Ref doesn't match the record start and is "
6655                         "compressed, please take a btrfs-image of this file "
6656                         "system and send it to a btrfs developer so they can "
6657                         "complete this functionality for bytenr %Lu\n",
6658                         dback->disk_bytenr);
6659                 ret = -EINVAL;
6660                 goto out;
6661         }
6662
6663         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6664                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6665         } else if (dback->disk_bytenr > entry->bytenr) {
6666                 u64 off_diff, offset;
6667
6668                 off_diff = dback->disk_bytenr - entry->bytenr;
6669                 offset = btrfs_file_extent_offset(leaf, fi);
6670                 if (dback->disk_bytenr + offset +
6671                     btrfs_file_extent_num_bytes(leaf, fi) >
6672                     entry->bytenr + entry->bytes) {
6673                         fprintf(stderr, "Ref is past the entry end, please "
6674                                 "take a btrfs-image of this file system and "
6675                                 "send it to a btrfs developer, ref %Lu\n",
6676                                 dback->disk_bytenr);
6677                         ret = -EINVAL;
6678                         goto out;
6679                 }
6680                 offset += off_diff;
6681                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6682                 btrfs_set_file_extent_offset(leaf, fi, offset);
6683         } else if (dback->disk_bytenr < entry->bytenr) {
6684                 u64 offset;
6685
6686                 offset = btrfs_file_extent_offset(leaf, fi);
6687                 if (dback->disk_bytenr + offset < entry->bytenr) {
6688                         fprintf(stderr, "Ref is before the entry start, please"
6689                                 " take a btrfs-image of this file system and "
6690                                 "send it to a btrfs developer, ref %Lu\n",
6691                                 dback->disk_bytenr);
6692                         ret = -EINVAL;
6693                         goto out;
6694                 }
6695
6696                 offset += dback->disk_bytenr;
6697                 offset -= entry->bytenr;
6698                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6699                 btrfs_set_file_extent_offset(leaf, fi, offset);
6700         }
6701
6702         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6703
6704         /*
6705          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6706          * only do this if we aren't using compression, otherwise it's a
6707          * trickier case.
6708          */
6709         if (!btrfs_file_extent_compression(leaf, fi))
6710                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6711         else
6712                 printf("ram bytes may be wrong?\n");
6713         btrfs_mark_buffer_dirty(leaf);
6714 out:
6715         err = btrfs_commit_transaction(trans, root);
6716         btrfs_release_path(path);
6717         return ret ? ret : err;
6718 }
6719
6720 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6721                            struct extent_record *rec)
6722 {
6723         struct extent_backref *back;
6724         struct data_backref *dback;
6725         struct extent_entry *entry, *best = NULL;
6726         LIST_HEAD(entries);
6727         int nr_entries = 0;
6728         int broken_entries = 0;
6729         int ret = 0;
6730         short mismatch = 0;
6731
6732         /*
6733          * Metadata is easy and the backrefs should always agree on bytenr and
6734          * size, if not we've got bigger issues.
6735          */
6736         if (rec->metadata)
6737                 return 0;
6738
6739         list_for_each_entry(back, &rec->backrefs, list) {
6740                 if (back->full_backref || !back->is_data)
6741                         continue;
6742
6743                 dback = (struct data_backref *)back;
6744
6745                 /*
6746                  * We only pay attention to backrefs that we found a real
6747                  * backref for.
6748                  */
6749                 if (dback->found_ref == 0)
6750                         continue;
6751
6752                 /*
6753                  * For now we only catch when the bytes don't match, not the
6754                  * bytenr.  We can easily do this at the same time, but I want
6755                  * to have a fs image to test on before we just add repair
6756                  * functionality willy-nilly so we know we won't screw up the
6757                  * repair.
6758                  */
6759
6760                 entry = find_entry(&entries, dback->disk_bytenr,
6761                                    dback->bytes);
6762                 if (!entry) {
6763                         entry = malloc(sizeof(struct extent_entry));
6764                         if (!entry) {
6765                                 ret = -ENOMEM;
6766                                 goto out;
6767                         }
6768                         memset(entry, 0, sizeof(*entry));
6769                         entry->bytenr = dback->disk_bytenr;
6770                         entry->bytes = dback->bytes;
6771                         list_add_tail(&entry->list, &entries);
6772                         nr_entries++;
6773                 }
6774
6775                 /*
6776                  * If we only have on entry we may think the entries agree when
6777                  * in reality they don't so we have to do some extra checking.
6778                  */
6779                 if (dback->disk_bytenr != rec->start ||
6780                     dback->bytes != rec->nr || back->broken)
6781                         mismatch = 1;
6782
6783                 if (back->broken) {
6784                         entry->broken++;
6785                         broken_entries++;
6786                 }
6787
6788                 entry->count++;
6789         }
6790
6791         /* Yay all the backrefs agree, carry on good sir */
6792         if (nr_entries <= 1 && !mismatch)
6793                 goto out;
6794
6795         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6796                 "%Lu\n", rec->start);
6797
6798         /*
6799          * First we want to see if the backrefs can agree amongst themselves who
6800          * is right, so figure out which one of the entries has the highest
6801          * count.
6802          */
6803         best = find_most_right_entry(&entries);
6804
6805         /*
6806          * Ok so we may have an even split between what the backrefs think, so
6807          * this is where we use the extent ref to see what it thinks.
6808          */
6809         if (!best) {
6810                 entry = find_entry(&entries, rec->start, rec->nr);
6811                 if (!entry && (!broken_entries || !rec->found_rec)) {
6812                         fprintf(stderr, "Backrefs don't agree with each other "
6813                                 "and extent record doesn't agree with anybody,"
6814                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6815                                 rec->start, rec->nr);
6816                         ret = -EINVAL;
6817                         goto out;
6818                 } else if (!entry) {
6819                         /*
6820                          * Ok our backrefs were broken, we'll assume this is the
6821                          * correct value and add an entry for this range.
6822                          */
6823                         entry = malloc(sizeof(struct extent_entry));
6824                         if (!entry) {
6825                                 ret = -ENOMEM;
6826                                 goto out;
6827                         }
6828                         memset(entry, 0, sizeof(*entry));
6829                         entry->bytenr = rec->start;
6830                         entry->bytes = rec->nr;
6831                         list_add_tail(&entry->list, &entries);
6832                         nr_entries++;
6833                 }
6834                 entry->count++;
6835                 best = find_most_right_entry(&entries);
6836                 if (!best) {
6837                         fprintf(stderr, "Backrefs and extent record evenly "
6838                                 "split on who is right, this is going to "
6839                                 "require user input to fix bytenr %Lu bytes "
6840                                 "%Lu\n", rec->start, rec->nr);
6841                         ret = -EINVAL;
6842                         goto out;
6843                 }
6844         }
6845
6846         /*
6847          * I don't think this can happen currently as we'll abort() if we catch
6848          * this case higher up, but in case somebody removes that we still can't
6849          * deal with it properly here yet, so just bail out of that's the case.
6850          */
6851         if (best->bytenr != rec->start) {
6852                 fprintf(stderr, "Extent start and backref starts don't match, "
6853                         "please use btrfs-image on this file system and send "
6854                         "it to a btrfs developer so they can make fsck fix "
6855                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6856                         rec->start, rec->nr);
6857                 ret = -EINVAL;
6858                 goto out;
6859         }
6860
6861         /*
6862          * Ok great we all agreed on an extent record, let's go find the real
6863          * references and fix up the ones that don't match.
6864          */
6865         list_for_each_entry(back, &rec->backrefs, list) {
6866                 if (back->full_backref || !back->is_data)
6867                         continue;
6868
6869                 dback = (struct data_backref *)back;
6870
6871                 /*
6872                  * Still ignoring backrefs that don't have a real ref attached
6873                  * to them.
6874                  */
6875                 if (dback->found_ref == 0)
6876                         continue;
6877
6878                 if (dback->bytes == best->bytes &&
6879                     dback->disk_bytenr == best->bytenr)
6880                         continue;
6881
6882                 ret = repair_ref(info, path, dback, best);
6883                 if (ret)
6884                         goto out;
6885         }
6886
6887         /*
6888          * Ok we messed with the actual refs, which means we need to drop our
6889          * entire cache and go back and rescan.  I know this is a huge pain and
6890          * adds a lot of extra work, but it's the only way to be safe.  Once all
6891          * the backrefs agree we may not need to do anything to the extent
6892          * record itself.
6893          */
6894         ret = -EAGAIN;
6895 out:
6896         while (!list_empty(&entries)) {
6897                 entry = list_entry(entries.next, struct extent_entry, list);
6898                 list_del_init(&entry->list);
6899                 free(entry);
6900         }
6901         return ret;
6902 }
6903
6904 static int process_duplicates(struct btrfs_root *root,
6905                               struct cache_tree *extent_cache,
6906                               struct extent_record *rec)
6907 {
6908         struct extent_record *good, *tmp;
6909         struct cache_extent *cache;
6910         int ret;
6911
6912         /*
6913          * If we found a extent record for this extent then return, or if we
6914          * have more than one duplicate we are likely going to need to delete
6915          * something.
6916          */
6917         if (rec->found_rec || rec->num_duplicates > 1)
6918                 return 0;
6919
6920         /* Shouldn't happen but just in case */
6921         BUG_ON(!rec->num_duplicates);
6922
6923         /*
6924          * So this happens if we end up with a backref that doesn't match the
6925          * actual extent entry.  So either the backref is bad or the extent
6926          * entry is bad.  Either way we want to have the extent_record actually
6927          * reflect what we found in the extent_tree, so we need to take the
6928          * duplicate out and use that as the extent_record since the only way we
6929          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6930          */
6931         remove_cache_extent(extent_cache, &rec->cache);
6932
6933         good = list_entry(rec->dups.next, struct extent_record, list);
6934         list_del_init(&good->list);
6935         INIT_LIST_HEAD(&good->backrefs);
6936         INIT_LIST_HEAD(&good->dups);
6937         good->cache.start = good->start;
6938         good->cache.size = good->nr;
6939         good->content_checked = 0;
6940         good->owner_ref_checked = 0;
6941         good->num_duplicates = 0;
6942         good->refs = rec->refs;
6943         list_splice_init(&rec->backrefs, &good->backrefs);
6944         while (1) {
6945                 cache = lookup_cache_extent(extent_cache, good->start,
6946                                             good->nr);
6947                 if (!cache)
6948                         break;
6949                 tmp = container_of(cache, struct extent_record, cache);
6950
6951                 /*
6952                  * If we find another overlapping extent and it's found_rec is
6953                  * set then it's a duplicate and we need to try and delete
6954                  * something.
6955                  */
6956                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6957                         if (list_empty(&good->list))
6958                                 list_add_tail(&good->list,
6959                                               &duplicate_extents);
6960                         good->num_duplicates += tmp->num_duplicates + 1;
6961                         list_splice_init(&tmp->dups, &good->dups);
6962                         list_del_init(&tmp->list);
6963                         list_add_tail(&tmp->list, &good->dups);
6964                         remove_cache_extent(extent_cache, &tmp->cache);
6965                         continue;
6966                 }
6967
6968                 /*
6969                  * Ok we have another non extent item backed extent rec, so lets
6970                  * just add it to this extent and carry on like we did above.
6971                  */
6972                 good->refs += tmp->refs;
6973                 list_splice_init(&tmp->backrefs, &good->backrefs);
6974                 remove_cache_extent(extent_cache, &tmp->cache);
6975                 free(tmp);
6976         }
6977         ret = insert_cache_extent(extent_cache, &good->cache);
6978         BUG_ON(ret);
6979         free(rec);
6980         return good->num_duplicates ? 0 : 1;
6981 }
6982
6983 static int delete_duplicate_records(struct btrfs_root *root,
6984                                     struct extent_record *rec)
6985 {
6986         struct btrfs_trans_handle *trans;
6987         LIST_HEAD(delete_list);
6988         struct btrfs_path *path;
6989         struct extent_record *tmp, *good, *n;
6990         int nr_del = 0;
6991         int ret = 0, err;
6992         struct btrfs_key key;
6993
6994         path = btrfs_alloc_path();
6995         if (!path) {
6996                 ret = -ENOMEM;
6997                 goto out;
6998         }
6999
7000         good = rec;
7001         /* Find the record that covers all of the duplicates. */
7002         list_for_each_entry(tmp, &rec->dups, list) {
7003                 if (good->start < tmp->start)
7004                         continue;
7005                 if (good->nr > tmp->nr)
7006                         continue;
7007
7008                 if (tmp->start + tmp->nr < good->start + good->nr) {
7009                         fprintf(stderr, "Ok we have overlapping extents that "
7010                                 "aren't completely covered by eachother, this "
7011                                 "is going to require more careful thought.  "
7012                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7013                                 tmp->start, tmp->nr, good->start, good->nr);
7014                         abort();
7015                 }
7016                 good = tmp;
7017         }
7018
7019         if (good != rec)
7020                 list_add_tail(&rec->list, &delete_list);
7021
7022         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7023                 if (tmp == good)
7024                         continue;
7025                 list_move_tail(&tmp->list, &delete_list);
7026         }
7027
7028         root = root->fs_info->extent_root;
7029         trans = btrfs_start_transaction(root, 1);
7030         if (IS_ERR(trans)) {
7031                 ret = PTR_ERR(trans);
7032                 goto out;
7033         }
7034
7035         list_for_each_entry(tmp, &delete_list, list) {
7036                 if (tmp->found_rec == 0)
7037                         continue;
7038                 key.objectid = tmp->start;
7039                 key.type = BTRFS_EXTENT_ITEM_KEY;
7040                 key.offset = tmp->nr;
7041
7042                 /* Shouldn't happen but just in case */
7043                 if (tmp->metadata) {
7044                         fprintf(stderr, "Well this shouldn't happen, extent "
7045                                 "record overlaps but is metadata? "
7046                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7047                         abort();
7048                 }
7049
7050                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7051                 if (ret) {
7052                         if (ret > 0)
7053                                 ret = -EINVAL;
7054                         break;
7055                 }
7056                 ret = btrfs_del_item(trans, root, path);
7057                 if (ret)
7058                         break;
7059                 btrfs_release_path(path);
7060                 nr_del++;
7061         }
7062         err = btrfs_commit_transaction(trans, root);
7063         if (err && !ret)
7064                 ret = err;
7065 out:
7066         while (!list_empty(&delete_list)) {
7067                 tmp = list_entry(delete_list.next, struct extent_record, list);
7068                 list_del_init(&tmp->list);
7069                 if (tmp == rec)
7070                         continue;
7071                 free(tmp);
7072         }
7073
7074         while (!list_empty(&rec->dups)) {
7075                 tmp = list_entry(rec->dups.next, struct extent_record, list);
7076                 list_del_init(&tmp->list);
7077                 free(tmp);
7078         }
7079
7080         btrfs_free_path(path);
7081
7082         if (!ret && !nr_del)
7083                 rec->num_duplicates = 0;
7084
7085         return ret ? ret : nr_del;
7086 }
7087
7088 static int find_possible_backrefs(struct btrfs_fs_info *info,
7089                                   struct btrfs_path *path,
7090                                   struct cache_tree *extent_cache,
7091                                   struct extent_record *rec)
7092 {
7093         struct btrfs_root *root;
7094         struct extent_backref *back;
7095         struct data_backref *dback;
7096         struct cache_extent *cache;
7097         struct btrfs_file_extent_item *fi;
7098         struct btrfs_key key;
7099         u64 bytenr, bytes;
7100         int ret;
7101
7102         list_for_each_entry(back, &rec->backrefs, list) {
7103                 /* Don't care about full backrefs (poor unloved backrefs) */
7104                 if (back->full_backref || !back->is_data)
7105                         continue;
7106
7107                 dback = (struct data_backref *)back;
7108
7109                 /* We found this one, we don't need to do a lookup */
7110                 if (dback->found_ref)
7111                         continue;
7112
7113                 key.objectid = dback->root;
7114                 key.type = BTRFS_ROOT_ITEM_KEY;
7115                 key.offset = (u64)-1;
7116
7117                 root = btrfs_read_fs_root(info, &key);
7118
7119                 /* No root, definitely a bad ref, skip */
7120                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7121                         continue;
7122                 /* Other err, exit */
7123                 if (IS_ERR(root))
7124                         return PTR_ERR(root);
7125
7126                 key.objectid = dback->owner;
7127                 key.type = BTRFS_EXTENT_DATA_KEY;
7128                 key.offset = dback->offset;
7129                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7130                 if (ret) {
7131                         btrfs_release_path(path);
7132                         if (ret < 0)
7133                                 return ret;
7134                         /* Didn't find it, we can carry on */
7135                         ret = 0;
7136                         continue;
7137                 }
7138
7139                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7140                                     struct btrfs_file_extent_item);
7141                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7142                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7143                 btrfs_release_path(path);
7144                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7145                 if (cache) {
7146                         struct extent_record *tmp;
7147                         tmp = container_of(cache, struct extent_record, cache);
7148
7149                         /*
7150                          * If we found an extent record for the bytenr for this
7151                          * particular backref then we can't add it to our
7152                          * current extent record.  We only want to add backrefs
7153                          * that don't have a corresponding extent item in the
7154                          * extent tree since they likely belong to this record
7155                          * and we need to fix it if it doesn't match bytenrs.
7156                          */
7157                         if  (tmp->found_rec)
7158                                 continue;
7159                 }
7160
7161                 dback->found_ref += 1;
7162                 dback->disk_bytenr = bytenr;
7163                 dback->bytes = bytes;
7164
7165                 /*
7166                  * Set this so the verify backref code knows not to trust the
7167                  * values in this backref.
7168                  */
7169                 back->broken = 1;
7170         }
7171
7172         return 0;
7173 }
7174
7175 /*
7176  * Record orphan data ref into corresponding root.
7177  *
7178  * Return 0 if the extent item contains data ref and recorded.
7179  * Return 1 if the extent item contains no useful data ref
7180  *   On that case, it may contains only shared_dataref or metadata backref
7181  *   or the file extent exists(this should be handled by the extent bytenr
7182  *   recovery routine)
7183  * Return <0 if something goes wrong.
7184  */
7185 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7186                                       struct extent_record *rec)
7187 {
7188         struct btrfs_key key;
7189         struct btrfs_root *dest_root;
7190         struct extent_backref *back;
7191         struct data_backref *dback;
7192         struct orphan_data_extent *orphan;
7193         struct btrfs_path *path;
7194         int recorded_data_ref = 0;
7195         int ret = 0;
7196
7197         if (rec->metadata)
7198                 return 1;
7199         path = btrfs_alloc_path();
7200         if (!path)
7201                 return -ENOMEM;
7202         list_for_each_entry(back, &rec->backrefs, list) {
7203                 if (back->full_backref || !back->is_data ||
7204                     !back->found_extent_tree)
7205                         continue;
7206                 dback = (struct data_backref *)back;
7207                 if (dback->found_ref)
7208                         continue;
7209                 key.objectid = dback->root;
7210                 key.type = BTRFS_ROOT_ITEM_KEY;
7211                 key.offset = (u64)-1;
7212
7213                 dest_root = btrfs_read_fs_root(fs_info, &key);
7214
7215                 /* For non-exist root we just skip it */
7216                 if (IS_ERR(dest_root) || !dest_root)
7217                         continue;
7218
7219                 key.objectid = dback->owner;
7220                 key.type = BTRFS_EXTENT_DATA_KEY;
7221                 key.offset = dback->offset;
7222
7223                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7224                 /*
7225                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7226                  * we need to record it for inode/file extent rebuild.
7227                  * For ret > 0, we record it only for file extent rebuild.
7228                  * For ret == 0, the file extent exists but only bytenr
7229                  * mismatch, let the original bytenr fix routine to handle,
7230                  * don't record it.
7231                  */
7232                 if (ret == 0)
7233                         continue;
7234                 ret = 0;
7235                 orphan = malloc(sizeof(*orphan));
7236                 if (!orphan) {
7237                         ret = -ENOMEM;
7238                         goto out;
7239                 }
7240                 INIT_LIST_HEAD(&orphan->list);
7241                 orphan->root = dback->root;
7242                 orphan->objectid = dback->owner;
7243                 orphan->offset = dback->offset;
7244                 orphan->disk_bytenr = rec->cache.start;
7245                 orphan->disk_len = rec->cache.size;
7246                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7247                 recorded_data_ref = 1;
7248         }
7249 out:
7250         btrfs_free_path(path);
7251         if (!ret)
7252                 return !recorded_data_ref;
7253         else
7254                 return ret;
7255 }
7256
7257 /*
7258  * when an incorrect extent item is found, this will delete
7259  * all of the existing entries for it and recreate them
7260  * based on what the tree scan found.
7261  */
7262 static int fixup_extent_refs(struct btrfs_fs_info *info,
7263                              struct cache_tree *extent_cache,
7264                              struct extent_record *rec)
7265 {
7266         struct btrfs_trans_handle *trans = NULL;
7267         int ret;
7268         struct btrfs_path *path;
7269         struct list_head *cur = rec->backrefs.next;
7270         struct cache_extent *cache;
7271         struct extent_backref *back;
7272         int allocated = 0;
7273         u64 flags = 0;
7274
7275         if (rec->flag_block_full_backref)
7276                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7277
7278         path = btrfs_alloc_path();
7279         if (!path)
7280                 return -ENOMEM;
7281
7282         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7283                 /*
7284                  * Sometimes the backrefs themselves are so broken they don't
7285                  * get attached to any meaningful rec, so first go back and
7286                  * check any of our backrefs that we couldn't find and throw
7287                  * them into the list if we find the backref so that
7288                  * verify_backrefs can figure out what to do.
7289                  */
7290                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7291                 if (ret < 0)
7292                         goto out;
7293         }
7294
7295         /* step one, make sure all of the backrefs agree */
7296         ret = verify_backrefs(info, path, rec);
7297         if (ret < 0)
7298                 goto out;
7299
7300         trans = btrfs_start_transaction(info->extent_root, 1);
7301         if (IS_ERR(trans)) {
7302                 ret = PTR_ERR(trans);
7303                 goto out;
7304         }
7305
7306         /* step two, delete all the existing records */
7307         ret = delete_extent_records(trans, info->extent_root, path,
7308                                     rec->start, rec->max_size);
7309
7310         if (ret < 0)
7311                 goto out;
7312
7313         /* was this block corrupt?  If so, don't add references to it */
7314         cache = lookup_cache_extent(info->corrupt_blocks,
7315                                     rec->start, rec->max_size);
7316         if (cache) {
7317                 ret = 0;
7318                 goto out;
7319         }
7320
7321         /* step three, recreate all the refs we did find */
7322         while(cur != &rec->backrefs) {
7323                 back = list_entry(cur, struct extent_backref, list);
7324                 cur = cur->next;
7325
7326                 /*
7327                  * if we didn't find any references, don't create a
7328                  * new extent record
7329                  */
7330                 if (!back->found_ref)
7331                         continue;
7332
7333                 rec->bad_full_backref = 0;
7334                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7335                 allocated = 1;
7336
7337                 if (ret)
7338                         goto out;
7339         }
7340 out:
7341         if (trans) {
7342                 int err = btrfs_commit_transaction(trans, info->extent_root);
7343                 if (!ret)
7344                         ret = err;
7345         }
7346
7347         btrfs_free_path(path);
7348         return ret;
7349 }
7350
7351 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7352                               struct extent_record *rec)
7353 {
7354         struct btrfs_trans_handle *trans;
7355         struct btrfs_root *root = fs_info->extent_root;
7356         struct btrfs_path *path;
7357         struct btrfs_extent_item *ei;
7358         struct btrfs_key key;
7359         u64 flags;
7360         int ret = 0;
7361
7362         key.objectid = rec->start;
7363         if (rec->metadata) {
7364                 key.type = BTRFS_METADATA_ITEM_KEY;
7365                 key.offset = rec->info_level;
7366         } else {
7367                 key.type = BTRFS_EXTENT_ITEM_KEY;
7368                 key.offset = rec->max_size;
7369         }
7370
7371         path = btrfs_alloc_path();
7372         if (!path)
7373                 return -ENOMEM;
7374
7375         trans = btrfs_start_transaction(root, 0);
7376         if (IS_ERR(trans)) {
7377                 btrfs_free_path(path);
7378                 return PTR_ERR(trans);
7379         }
7380
7381         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7382         if (ret < 0) {
7383                 btrfs_free_path(path);
7384                 btrfs_commit_transaction(trans, root);
7385                 return ret;
7386         } else if (ret) {
7387                 fprintf(stderr, "Didn't find extent for %llu\n",
7388                         (unsigned long long)rec->start);
7389                 btrfs_free_path(path);
7390                 btrfs_commit_transaction(trans, root);
7391                 return -ENOENT;
7392         }
7393
7394         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7395                             struct btrfs_extent_item);
7396         flags = btrfs_extent_flags(path->nodes[0], ei);
7397         if (rec->flag_block_full_backref) {
7398                 fprintf(stderr, "setting full backref on %llu\n",
7399                         (unsigned long long)key.objectid);
7400                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7401         } else {
7402                 fprintf(stderr, "clearing full backref on %llu\n",
7403                         (unsigned long long)key.objectid);
7404                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7405         }
7406         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7407         btrfs_mark_buffer_dirty(path->nodes[0]);
7408         btrfs_free_path(path);
7409         return btrfs_commit_transaction(trans, root);
7410 }
7411
7412 /* right now we only prune from the extent allocation tree */
7413 static int prune_one_block(struct btrfs_trans_handle *trans,
7414                            struct btrfs_fs_info *info,
7415                            struct btrfs_corrupt_block *corrupt)
7416 {
7417         int ret;
7418         struct btrfs_path path;
7419         struct extent_buffer *eb;
7420         u64 found;
7421         int slot;
7422         int nritems;
7423         int level = corrupt->level + 1;
7424
7425         btrfs_init_path(&path);
7426 again:
7427         /* we want to stop at the parent to our busted block */
7428         path.lowest_level = level;
7429
7430         ret = btrfs_search_slot(trans, info->extent_root,
7431                                 &corrupt->key, &path, -1, 1);
7432
7433         if (ret < 0)
7434                 goto out;
7435
7436         eb = path.nodes[level];
7437         if (!eb) {
7438                 ret = -ENOENT;
7439                 goto out;
7440         }
7441
7442         /*
7443          * hopefully the search gave us the block we want to prune,
7444          * lets try that first
7445          */
7446         slot = path.slots[level];
7447         found =  btrfs_node_blockptr(eb, slot);
7448         if (found == corrupt->cache.start)
7449                 goto del_ptr;
7450
7451         nritems = btrfs_header_nritems(eb);
7452
7453         /* the search failed, lets scan this node and hope we find it */
7454         for (slot = 0; slot < nritems; slot++) {
7455                 found =  btrfs_node_blockptr(eb, slot);
7456                 if (found == corrupt->cache.start)
7457                         goto del_ptr;
7458         }
7459         /*
7460          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7461          * to this block
7462          */
7463         if (eb == info->extent_root->node) {
7464                 ret = -ENOENT;
7465                 goto out;
7466         } else {
7467                 level++;
7468                 btrfs_release_path(&path);
7469                 goto again;
7470         }
7471
7472 del_ptr:
7473         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7474         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7475
7476 out:
7477         btrfs_release_path(&path);
7478         return ret;
7479 }
7480
7481 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7482 {
7483         struct btrfs_trans_handle *trans = NULL;
7484         struct cache_extent *cache;
7485         struct btrfs_corrupt_block *corrupt;
7486
7487         while (1) {
7488                 cache = search_cache_extent(info->corrupt_blocks, 0);
7489                 if (!cache)
7490                         break;
7491                 if (!trans) {
7492                         trans = btrfs_start_transaction(info->extent_root, 1);
7493                         if (IS_ERR(trans))
7494                                 return PTR_ERR(trans);
7495                 }
7496                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7497                 prune_one_block(trans, info, corrupt);
7498                 remove_cache_extent(info->corrupt_blocks, cache);
7499         }
7500         if (trans)
7501                 return btrfs_commit_transaction(trans, info->extent_root);
7502         return 0;
7503 }
7504
7505 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7506 {
7507         struct btrfs_block_group_cache *cache;
7508         u64 start, end;
7509         int ret;
7510
7511         while (1) {
7512                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7513                                             &start, &end, EXTENT_DIRTY);
7514                 if (ret)
7515                         break;
7516                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7517                                    GFP_NOFS);
7518         }
7519
7520         start = 0;
7521         while (1) {
7522                 cache = btrfs_lookup_first_block_group(fs_info, start);
7523                 if (!cache)
7524                         break;
7525                 if (cache->cached)
7526                         cache->cached = 0;
7527                 start = cache->key.objectid + cache->key.offset;
7528         }
7529 }
7530
7531 static int check_extent_refs(struct btrfs_root *root,
7532                              struct cache_tree *extent_cache)
7533 {
7534         struct extent_record *rec;
7535         struct cache_extent *cache;
7536         int err = 0;
7537         int ret = 0;
7538         int fixed = 0;
7539         int had_dups = 0;
7540         int recorded = 0;
7541
7542         if (repair) {
7543                 /*
7544                  * if we're doing a repair, we have to make sure
7545                  * we don't allocate from the problem extents.
7546                  * In the worst case, this will be all the
7547                  * extents in the FS
7548                  */
7549                 cache = search_cache_extent(extent_cache, 0);
7550                 while(cache) {
7551                         rec = container_of(cache, struct extent_record, cache);
7552                         set_extent_dirty(root->fs_info->excluded_extents,
7553                                          rec->start,
7554                                          rec->start + rec->max_size - 1,
7555                                          GFP_NOFS);
7556                         cache = next_cache_extent(cache);
7557                 }
7558
7559                 /* pin down all the corrupted blocks too */
7560                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7561                 while(cache) {
7562                         set_extent_dirty(root->fs_info->excluded_extents,
7563                                          cache->start,
7564                                          cache->start + cache->size - 1,
7565                                          GFP_NOFS);
7566                         cache = next_cache_extent(cache);
7567                 }
7568                 prune_corrupt_blocks(root->fs_info);
7569                 reset_cached_block_groups(root->fs_info);
7570         }
7571
7572         reset_cached_block_groups(root->fs_info);
7573
7574         /*
7575          * We need to delete any duplicate entries we find first otherwise we
7576          * could mess up the extent tree when we have backrefs that actually
7577          * belong to a different extent item and not the weird duplicate one.
7578          */
7579         while (repair && !list_empty(&duplicate_extents)) {
7580                 rec = list_entry(duplicate_extents.next, struct extent_record,
7581                                  list);
7582                 list_del_init(&rec->list);
7583
7584                 /* Sometimes we can find a backref before we find an actual
7585                  * extent, so we need to process it a little bit to see if there
7586                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7587                  * if this is a backref screwup.  If we need to delete stuff
7588                  * process_duplicates() will return 0, otherwise it will return
7589                  * 1 and we
7590                  */
7591                 if (process_duplicates(root, extent_cache, rec))
7592                         continue;
7593                 ret = delete_duplicate_records(root, rec);
7594                 if (ret < 0)
7595                         return ret;
7596                 /*
7597                  * delete_duplicate_records will return the number of entries
7598                  * deleted, so if it's greater than 0 then we know we actually
7599                  * did something and we need to remove.
7600                  */
7601                 if (ret)
7602                         had_dups = 1;
7603         }
7604
7605         if (had_dups)
7606                 return -EAGAIN;
7607
7608         while(1) {
7609                 int cur_err = 0;
7610
7611                 fixed = 0;
7612                 recorded = 0;
7613                 cache = search_cache_extent(extent_cache, 0);
7614                 if (!cache)
7615                         break;
7616                 rec = container_of(cache, struct extent_record, cache);
7617                 if (rec->num_duplicates) {
7618                         fprintf(stderr, "extent item %llu has multiple extent "
7619                                 "items\n", (unsigned long long)rec->start);
7620                         err = 1;
7621                         cur_err = 1;
7622                 }
7623
7624                 if (rec->refs != rec->extent_item_refs) {
7625                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7626                                 (unsigned long long)rec->start,
7627                                 (unsigned long long)rec->nr);
7628                         fprintf(stderr, "extent item %llu, found %llu\n",
7629                                 (unsigned long long)rec->extent_item_refs,
7630                                 (unsigned long long)rec->refs);
7631                         ret = record_orphan_data_extents(root->fs_info, rec);
7632                         if (ret < 0)
7633                                 goto repair_abort;
7634                         if (ret == 0) {
7635                                 recorded = 1;
7636                         } else {
7637                                 /*
7638                                  * we can't use the extent to repair file
7639                                  * extent, let the fallback method handle it.
7640                                  */
7641                                 if (!fixed && repair) {
7642                                         ret = fixup_extent_refs(
7643                                                         root->fs_info,
7644                                                         extent_cache, rec);
7645                                         if (ret)
7646                                                 goto repair_abort;
7647                                         fixed = 1;
7648                                 }
7649                         }
7650                         err = 1;
7651                         cur_err = 1;
7652                 }
7653                 if (all_backpointers_checked(rec, 1)) {
7654                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7655                                 (unsigned long long)rec->start,
7656                                 (unsigned long long)rec->nr);
7657
7658                         if (!fixed && !recorded && repair) {
7659                                 ret = fixup_extent_refs(root->fs_info,
7660                                                         extent_cache, rec);
7661                                 if (ret)
7662                                         goto repair_abort;
7663                                 fixed = 1;
7664                         }
7665                         cur_err = 1;
7666                         err = 1;
7667                 }
7668                 if (!rec->owner_ref_checked) {
7669                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7670                                 (unsigned long long)rec->start,
7671                                 (unsigned long long)rec->nr);
7672                         if (!fixed && !recorded && repair) {
7673                                 ret = fixup_extent_refs(root->fs_info,
7674                                                         extent_cache, rec);
7675                                 if (ret)
7676                                         goto repair_abort;
7677                                 fixed = 1;
7678                         }
7679                         err = 1;
7680                         cur_err = 1;
7681                 }
7682                 if (rec->bad_full_backref) {
7683                         fprintf(stderr, "bad full backref, on [%llu]\n",
7684                                 (unsigned long long)rec->start);
7685                         if (repair) {
7686                                 ret = fixup_extent_flags(root->fs_info, rec);
7687                                 if (ret)
7688                                         goto repair_abort;
7689                                 fixed = 1;
7690                         }
7691                         err = 1;
7692                         cur_err = 1;
7693                 }
7694                 /*
7695                  * Although it's not a extent ref's problem, we reuse this
7696                  * routine for error reporting.
7697                  * No repair function yet.
7698                  */
7699                 if (rec->crossing_stripes) {
7700                         fprintf(stderr,
7701                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7702                                 rec->start, rec->start + rec->max_size);
7703                         err = 1;
7704                         cur_err = 1;
7705                 }
7706
7707                 if (rec->wrong_chunk_type) {
7708                         fprintf(stderr,
7709                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7710                                 rec->start, rec->start + rec->max_size);
7711                         err = 1;
7712                         cur_err = 1;
7713                 }
7714
7715                 remove_cache_extent(extent_cache, cache);
7716                 free_all_extent_backrefs(rec);
7717                 if (!init_extent_tree && repair && (!cur_err || fixed))
7718                         clear_extent_dirty(root->fs_info->excluded_extents,
7719                                            rec->start,
7720                                            rec->start + rec->max_size - 1,
7721                                            GFP_NOFS);
7722                 free(rec);
7723         }
7724 repair_abort:
7725         if (repair) {
7726                 if (ret && ret != -EAGAIN) {
7727                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7728                         exit(1);
7729                 } else if (!ret) {
7730                         struct btrfs_trans_handle *trans;
7731
7732                         root = root->fs_info->extent_root;
7733                         trans = btrfs_start_transaction(root, 1);
7734                         if (IS_ERR(trans)) {
7735                                 ret = PTR_ERR(trans);
7736                                 goto repair_abort;
7737                         }
7738
7739                         btrfs_fix_block_accounting(trans, root);
7740                         ret = btrfs_commit_transaction(trans, root);
7741                         if (ret)
7742                                 goto repair_abort;
7743                 }
7744                 if (err)
7745                         fprintf(stderr, "repaired damaged extent references\n");
7746                 return ret;
7747         }
7748         return err;
7749 }
7750
7751 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7752 {
7753         u64 stripe_size;
7754
7755         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7756                 stripe_size = length;
7757                 stripe_size /= num_stripes;
7758         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7759                 stripe_size = length * 2;
7760                 stripe_size /= num_stripes;
7761         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7762                 stripe_size = length;
7763                 stripe_size /= (num_stripes - 1);
7764         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7765                 stripe_size = length;
7766                 stripe_size /= (num_stripes - 2);
7767         } else {
7768                 stripe_size = length;
7769         }
7770         return stripe_size;
7771 }
7772
7773 /*
7774  * Check the chunk with its block group/dev list ref:
7775  * Return 0 if all refs seems valid.
7776  * Return 1 if part of refs seems valid, need later check for rebuild ref
7777  * like missing block group and needs to search extent tree to rebuild them.
7778  * Return -1 if essential refs are missing and unable to rebuild.
7779  */
7780 static int check_chunk_refs(struct chunk_record *chunk_rec,
7781                             struct block_group_tree *block_group_cache,
7782                             struct device_extent_tree *dev_extent_cache,
7783                             int silent)
7784 {
7785         struct cache_extent *block_group_item;
7786         struct block_group_record *block_group_rec;
7787         struct cache_extent *dev_extent_item;
7788         struct device_extent_record *dev_extent_rec;
7789         u64 devid;
7790         u64 offset;
7791         u64 length;
7792         int metadump_v2 = 0;
7793         int i;
7794         int ret = 0;
7795
7796         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7797                                                chunk_rec->offset,
7798                                                chunk_rec->length);
7799         if (block_group_item) {
7800                 block_group_rec = container_of(block_group_item,
7801                                                struct block_group_record,
7802                                                cache);
7803                 if (chunk_rec->length != block_group_rec->offset ||
7804                     chunk_rec->offset != block_group_rec->objectid ||
7805                     (!metadump_v2 &&
7806                      chunk_rec->type_flags != block_group_rec->flags)) {
7807                         if (!silent)
7808                                 fprintf(stderr,
7809                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7810                                         chunk_rec->objectid,
7811                                         chunk_rec->type,
7812                                         chunk_rec->offset,
7813                                         chunk_rec->length,
7814                                         chunk_rec->offset,
7815                                         chunk_rec->type_flags,
7816                                         block_group_rec->objectid,
7817                                         block_group_rec->type,
7818                                         block_group_rec->offset,
7819                                         block_group_rec->offset,
7820                                         block_group_rec->objectid,
7821                                         block_group_rec->flags);
7822                         ret = -1;
7823                 } else {
7824                         list_del_init(&block_group_rec->list);
7825                         chunk_rec->bg_rec = block_group_rec;
7826                 }
7827         } else {
7828                 if (!silent)
7829                         fprintf(stderr,
7830                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7831                                 chunk_rec->objectid,
7832                                 chunk_rec->type,
7833                                 chunk_rec->offset,
7834                                 chunk_rec->length,
7835                                 chunk_rec->offset,
7836                                 chunk_rec->type_flags);
7837                 ret = 1;
7838         }
7839
7840         if (metadump_v2)
7841                 return ret;
7842
7843         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7844                                     chunk_rec->num_stripes);
7845         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7846                 devid = chunk_rec->stripes[i].devid;
7847                 offset = chunk_rec->stripes[i].offset;
7848                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7849                                                        devid, offset, length);
7850                 if (dev_extent_item) {
7851                         dev_extent_rec = container_of(dev_extent_item,
7852                                                 struct device_extent_record,
7853                                                 cache);
7854                         if (dev_extent_rec->objectid != devid ||
7855                             dev_extent_rec->offset != offset ||
7856                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7857                             dev_extent_rec->length != length) {
7858                                 if (!silent)
7859                                         fprintf(stderr,
7860                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7861                                                 chunk_rec->objectid,
7862                                                 chunk_rec->type,
7863                                                 chunk_rec->offset,
7864                                                 chunk_rec->stripes[i].devid,
7865                                                 chunk_rec->stripes[i].offset,
7866                                                 dev_extent_rec->objectid,
7867                                                 dev_extent_rec->offset,
7868                                                 dev_extent_rec->length);
7869                                 ret = -1;
7870                         } else {
7871                                 list_move(&dev_extent_rec->chunk_list,
7872                                           &chunk_rec->dextents);
7873                         }
7874                 } else {
7875                         if (!silent)
7876                                 fprintf(stderr,
7877                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7878                                         chunk_rec->objectid,
7879                                         chunk_rec->type,
7880                                         chunk_rec->offset,
7881                                         chunk_rec->stripes[i].devid,
7882                                         chunk_rec->stripes[i].offset);
7883                         ret = -1;
7884                 }
7885         }
7886         return ret;
7887 }
7888
7889 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7890 int check_chunks(struct cache_tree *chunk_cache,
7891                  struct block_group_tree *block_group_cache,
7892                  struct device_extent_tree *dev_extent_cache,
7893                  struct list_head *good, struct list_head *bad,
7894                  struct list_head *rebuild, int silent)
7895 {
7896         struct cache_extent *chunk_item;
7897         struct chunk_record *chunk_rec;
7898         struct block_group_record *bg_rec;
7899         struct device_extent_record *dext_rec;
7900         int err;
7901         int ret = 0;
7902
7903         chunk_item = first_cache_extent(chunk_cache);
7904         while (chunk_item) {
7905                 chunk_rec = container_of(chunk_item, struct chunk_record,
7906                                          cache);
7907                 err = check_chunk_refs(chunk_rec, block_group_cache,
7908                                        dev_extent_cache, silent);
7909                 if (err < 0)
7910                         ret = err;
7911                 if (err == 0 && good)
7912                         list_add_tail(&chunk_rec->list, good);
7913                 if (err > 0 && rebuild)
7914                         list_add_tail(&chunk_rec->list, rebuild);
7915                 if (err < 0 && bad)
7916                         list_add_tail(&chunk_rec->list, bad);
7917                 chunk_item = next_cache_extent(chunk_item);
7918         }
7919
7920         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7921                 if (!silent)
7922                         fprintf(stderr,
7923                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7924                                 bg_rec->objectid,
7925                                 bg_rec->offset,
7926                                 bg_rec->flags);
7927                 if (!ret)
7928                         ret = 1;
7929         }
7930
7931         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7932                             chunk_list) {
7933                 if (!silent)
7934                         fprintf(stderr,
7935                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7936                                 dext_rec->objectid,
7937                                 dext_rec->offset,
7938                                 dext_rec->length);
7939                 if (!ret)
7940                         ret = 1;
7941         }
7942         return ret;
7943 }
7944
7945
7946 static int check_device_used(struct device_record *dev_rec,
7947                              struct device_extent_tree *dext_cache)
7948 {
7949         struct cache_extent *cache;
7950         struct device_extent_record *dev_extent_rec;
7951         u64 total_byte = 0;
7952
7953         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7954         while (cache) {
7955                 dev_extent_rec = container_of(cache,
7956                                               struct device_extent_record,
7957                                               cache);
7958                 if (dev_extent_rec->objectid != dev_rec->devid)
7959                         break;
7960
7961                 list_del_init(&dev_extent_rec->device_list);
7962                 total_byte += dev_extent_rec->length;
7963                 cache = next_cache_extent(cache);
7964         }
7965
7966         if (total_byte != dev_rec->byte_used) {
7967                 fprintf(stderr,
7968                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7969                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7970                         dev_rec->type, dev_rec->offset);
7971                 return -1;
7972         } else {
7973                 return 0;
7974         }
7975 }
7976
7977 /* check btrfs_dev_item -> btrfs_dev_extent */
7978 static int check_devices(struct rb_root *dev_cache,
7979                          struct device_extent_tree *dev_extent_cache)
7980 {
7981         struct rb_node *dev_node;
7982         struct device_record *dev_rec;
7983         struct device_extent_record *dext_rec;
7984         int err;
7985         int ret = 0;
7986
7987         dev_node = rb_first(dev_cache);
7988         while (dev_node) {
7989                 dev_rec = container_of(dev_node, struct device_record, node);
7990                 err = check_device_used(dev_rec, dev_extent_cache);
7991                 if (err)
7992                         ret = err;
7993
7994                 dev_node = rb_next(dev_node);
7995         }
7996         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7997                             device_list) {
7998                 fprintf(stderr,
7999                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8000                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8001                 if (!ret)
8002                         ret = 1;
8003         }
8004         return ret;
8005 }
8006
8007 static int add_root_item_to_list(struct list_head *head,
8008                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8009                                   u8 level, u8 drop_level,
8010                                   int level_size, struct btrfs_key *drop_key)
8011 {
8012
8013         struct root_item_record *ri_rec;
8014         ri_rec = malloc(sizeof(*ri_rec));
8015         if (!ri_rec)
8016                 return -ENOMEM;
8017         ri_rec->bytenr = bytenr;
8018         ri_rec->objectid = objectid;
8019         ri_rec->level = level;
8020         ri_rec->level_size = level_size;
8021         ri_rec->drop_level = drop_level;
8022         ri_rec->last_snapshot = last_snapshot;
8023         if (drop_key)
8024                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8025         list_add_tail(&ri_rec->list, head);
8026
8027         return 0;
8028 }
8029
8030 static void free_root_item_list(struct list_head *list)
8031 {
8032         struct root_item_record *ri_rec;
8033
8034         while (!list_empty(list)) {
8035                 ri_rec = list_first_entry(list, struct root_item_record,
8036                                           list);
8037                 list_del_init(&ri_rec->list);
8038                 free(ri_rec);
8039         }
8040 }
8041
8042 static int deal_root_from_list(struct list_head *list,
8043                                struct btrfs_root *root,
8044                                struct block_info *bits,
8045                                int bits_nr,
8046                                struct cache_tree *pending,
8047                                struct cache_tree *seen,
8048                                struct cache_tree *reada,
8049                                struct cache_tree *nodes,
8050                                struct cache_tree *extent_cache,
8051                                struct cache_tree *chunk_cache,
8052                                struct rb_root *dev_cache,
8053                                struct block_group_tree *block_group_cache,
8054                                struct device_extent_tree *dev_extent_cache)
8055 {
8056         int ret = 0;
8057         u64 last;
8058
8059         while (!list_empty(list)) {
8060                 struct root_item_record *rec;
8061                 struct extent_buffer *buf;
8062                 rec = list_entry(list->next,
8063                                  struct root_item_record, list);
8064                 last = 0;
8065                 buf = read_tree_block(root->fs_info->tree_root,
8066                                       rec->bytenr, rec->level_size, 0);
8067                 if (!extent_buffer_uptodate(buf)) {
8068                         free_extent_buffer(buf);
8069                         ret = -EIO;
8070                         break;
8071                 }
8072                 add_root_to_pending(buf, extent_cache, pending,
8073                                     seen, nodes, rec->objectid);
8074                 /*
8075                  * To rebuild extent tree, we need deal with snapshot
8076                  * one by one, otherwise we deal with node firstly which
8077                  * can maximize readahead.
8078                  */
8079                 while (1) {
8080                         ret = run_next_block(root, bits, bits_nr, &last,
8081                                              pending, seen, reada, nodes,
8082                                              extent_cache, chunk_cache,
8083                                              dev_cache, block_group_cache,
8084                                              dev_extent_cache, rec);
8085                         if (ret != 0)
8086                                 break;
8087                 }
8088                 free_extent_buffer(buf);
8089                 list_del(&rec->list);
8090                 free(rec);
8091                 if (ret < 0)
8092                         break;
8093         }
8094         while (ret >= 0) {
8095                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8096                                      reada, nodes, extent_cache, chunk_cache,
8097                                      dev_cache, block_group_cache,
8098                                      dev_extent_cache, NULL);
8099                 if (ret != 0) {
8100                         if (ret > 0)
8101                                 ret = 0;
8102                         break;
8103                 }
8104         }
8105         return ret;
8106 }
8107
8108 static int check_chunks_and_extents(struct btrfs_root *root)
8109 {
8110         struct rb_root dev_cache;
8111         struct cache_tree chunk_cache;
8112         struct block_group_tree block_group_cache;
8113         struct device_extent_tree dev_extent_cache;
8114         struct cache_tree extent_cache;
8115         struct cache_tree seen;
8116         struct cache_tree pending;
8117         struct cache_tree reada;
8118         struct cache_tree nodes;
8119         struct extent_io_tree excluded_extents;
8120         struct cache_tree corrupt_blocks;
8121         struct btrfs_path path;
8122         struct btrfs_key key;
8123         struct btrfs_key found_key;
8124         int ret, err = 0;
8125         struct block_info *bits;
8126         int bits_nr;
8127         struct extent_buffer *leaf;
8128         int slot;
8129         struct btrfs_root_item ri;
8130         struct list_head dropping_trees;
8131         struct list_head normal_trees;
8132         struct btrfs_root *root1;
8133         u64 objectid;
8134         u32 level_size;
8135         u8 level;
8136
8137         dev_cache = RB_ROOT;
8138         cache_tree_init(&chunk_cache);
8139         block_group_tree_init(&block_group_cache);
8140         device_extent_tree_init(&dev_extent_cache);
8141
8142         cache_tree_init(&extent_cache);
8143         cache_tree_init(&seen);
8144         cache_tree_init(&pending);
8145         cache_tree_init(&nodes);
8146         cache_tree_init(&reada);
8147         cache_tree_init(&corrupt_blocks);
8148         extent_io_tree_init(&excluded_extents);
8149         INIT_LIST_HEAD(&dropping_trees);
8150         INIT_LIST_HEAD(&normal_trees);
8151
8152         if (repair) {
8153                 root->fs_info->excluded_extents = &excluded_extents;
8154                 root->fs_info->fsck_extent_cache = &extent_cache;
8155                 root->fs_info->free_extent_hook = free_extent_hook;
8156                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8157         }
8158
8159         bits_nr = 1024;
8160         bits = malloc(bits_nr * sizeof(struct block_info));
8161         if (!bits) {
8162                 perror("malloc");
8163                 exit(1);
8164         }
8165
8166         if (ctx.progress_enabled) {
8167                 ctx.tp = TASK_EXTENTS;
8168                 task_start(ctx.info);
8169         }
8170
8171 again:
8172         root1 = root->fs_info->tree_root;
8173         level = btrfs_header_level(root1->node);
8174         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8175                                     root1->node->start, 0, level, 0,
8176                                     btrfs_level_size(root1, level), NULL);
8177         if (ret < 0)
8178                 goto out;
8179         root1 = root->fs_info->chunk_root;
8180         level = btrfs_header_level(root1->node);
8181         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8182                                     root1->node->start, 0, level, 0,
8183                                     btrfs_level_size(root1, level), NULL);
8184         if (ret < 0)
8185                 goto out;
8186         btrfs_init_path(&path);
8187         key.offset = 0;
8188         key.objectid = 0;
8189         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8190         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8191                                         &key, &path, 0, 0);
8192         if (ret < 0)
8193                 goto out;
8194         while(1) {
8195                 leaf = path.nodes[0];
8196                 slot = path.slots[0];
8197                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8198                         ret = btrfs_next_leaf(root, &path);
8199                         if (ret != 0)
8200                                 break;
8201                         leaf = path.nodes[0];
8202                         slot = path.slots[0];
8203                 }
8204                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8205                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8206                         unsigned long offset;
8207                         u64 last_snapshot;
8208
8209                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8210                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8211                         last_snapshot = btrfs_root_last_snapshot(&ri);
8212                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8213                                 level = btrfs_root_level(&ri);
8214                                 level_size = btrfs_level_size(root, level);
8215                                 ret = add_root_item_to_list(&normal_trees,
8216                                                 found_key.objectid,
8217                                                 btrfs_root_bytenr(&ri),
8218                                                 last_snapshot, level,
8219                                                 0, level_size, NULL);
8220                                 if (ret < 0)
8221                                         goto out;
8222                         } else {
8223                                 level = btrfs_root_level(&ri);
8224                                 level_size = btrfs_level_size(root, level);
8225                                 objectid = found_key.objectid;
8226                                 btrfs_disk_key_to_cpu(&found_key,
8227                                                       &ri.drop_progress);
8228                                 ret = add_root_item_to_list(&dropping_trees,
8229                                                 objectid,
8230                                                 btrfs_root_bytenr(&ri),
8231                                                 last_snapshot, level,
8232                                                 ri.drop_level,
8233                                                 level_size, &found_key);
8234                                 if (ret < 0)
8235                                         goto out;
8236                         }
8237                 }
8238                 path.slots[0]++;
8239         }
8240         btrfs_release_path(&path);
8241
8242         /*
8243          * check_block can return -EAGAIN if it fixes something, please keep
8244          * this in mind when dealing with return values from these functions, if
8245          * we get -EAGAIN we want to fall through and restart the loop.
8246          */
8247         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8248                                   &seen, &reada, &nodes, &extent_cache,
8249                                   &chunk_cache, &dev_cache, &block_group_cache,
8250                                   &dev_extent_cache);
8251         if (ret < 0) {
8252                 if (ret == -EAGAIN)
8253                         goto loop;
8254                 goto out;
8255         }
8256         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8257                                   &pending, &seen, &reada, &nodes,
8258                                   &extent_cache, &chunk_cache, &dev_cache,
8259                                   &block_group_cache, &dev_extent_cache);
8260         if (ret < 0) {
8261                 if (ret == -EAGAIN)
8262                         goto loop;
8263                 goto out;
8264         }
8265
8266         ret = check_chunks(&chunk_cache, &block_group_cache,
8267                            &dev_extent_cache, NULL, NULL, NULL, 0);
8268         if (ret) {
8269                 if (ret == -EAGAIN)
8270                         goto loop;
8271                 err = ret;
8272         }
8273
8274         ret = check_extent_refs(root, &extent_cache);
8275         if (ret < 0) {
8276                 if (ret == -EAGAIN)
8277                         goto loop;
8278                 goto out;
8279         }
8280
8281         ret = check_devices(&dev_cache, &dev_extent_cache);
8282         if (ret && err)
8283                 ret = err;
8284
8285 out:
8286         task_stop(ctx.info);
8287         if (repair) {
8288                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8289                 extent_io_tree_cleanup(&excluded_extents);
8290                 root->fs_info->fsck_extent_cache = NULL;
8291                 root->fs_info->free_extent_hook = NULL;
8292                 root->fs_info->corrupt_blocks = NULL;
8293                 root->fs_info->excluded_extents = NULL;
8294         }
8295         free(bits);
8296         free_chunk_cache_tree(&chunk_cache);
8297         free_device_cache_tree(&dev_cache);
8298         free_block_group_tree(&block_group_cache);
8299         free_device_extent_tree(&dev_extent_cache);
8300         free_extent_cache_tree(&seen);
8301         free_extent_cache_tree(&pending);
8302         free_extent_cache_tree(&reada);
8303         free_extent_cache_tree(&nodes);
8304         return ret;
8305 loop:
8306         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8307         free_extent_cache_tree(&seen);
8308         free_extent_cache_tree(&pending);
8309         free_extent_cache_tree(&reada);
8310         free_extent_cache_tree(&nodes);
8311         free_chunk_cache_tree(&chunk_cache);
8312         free_block_group_tree(&block_group_cache);
8313         free_device_cache_tree(&dev_cache);
8314         free_device_extent_tree(&dev_extent_cache);
8315         free_extent_record_cache(root->fs_info, &extent_cache);
8316         free_root_item_list(&normal_trees);
8317         free_root_item_list(&dropping_trees);
8318         extent_io_tree_cleanup(&excluded_extents);
8319         goto again;
8320 }
8321
8322 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8323                            struct btrfs_root *root, int overwrite)
8324 {
8325         struct extent_buffer *c;
8326         struct extent_buffer *old = root->node;
8327         int level;
8328         int ret;
8329         struct btrfs_disk_key disk_key = {0,0,0};
8330
8331         level = 0;
8332
8333         if (overwrite) {
8334                 c = old;
8335                 extent_buffer_get(c);
8336                 goto init;
8337         }
8338         c = btrfs_alloc_free_block(trans, root,
8339                                    btrfs_level_size(root, 0),
8340                                    root->root_key.objectid,
8341                                    &disk_key, level, 0, 0);
8342         if (IS_ERR(c)) {
8343                 c = old;
8344                 extent_buffer_get(c);
8345                 overwrite = 1;
8346         }
8347 init:
8348         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8349         btrfs_set_header_level(c, level);
8350         btrfs_set_header_bytenr(c, c->start);
8351         btrfs_set_header_generation(c, trans->transid);
8352         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8353         btrfs_set_header_owner(c, root->root_key.objectid);
8354
8355         write_extent_buffer(c, root->fs_info->fsid,
8356                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8357
8358         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8359                             btrfs_header_chunk_tree_uuid(c),
8360                             BTRFS_UUID_SIZE);
8361
8362         btrfs_mark_buffer_dirty(c);
8363         /*
8364          * this case can happen in the following case:
8365          *
8366          * 1.overwrite previous root.
8367          *
8368          * 2.reinit reloc data root, this is because we skip pin
8369          * down reloc data tree before which means we can allocate
8370          * same block bytenr here.
8371          */
8372         if (old->start == c->start) {
8373                 btrfs_set_root_generation(&root->root_item,
8374                                           trans->transid);
8375                 root->root_item.level = btrfs_header_level(root->node);
8376                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8377                                         &root->root_key, &root->root_item);
8378                 if (ret) {
8379                         free_extent_buffer(c);
8380                         return ret;
8381                 }
8382         }
8383         free_extent_buffer(old);
8384         root->node = c;
8385         add_root_to_dirty_list(root);
8386         return 0;
8387 }
8388
8389 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8390                                 struct extent_buffer *eb, int tree_root)
8391 {
8392         struct extent_buffer *tmp;
8393         struct btrfs_root_item *ri;
8394         struct btrfs_key key;
8395         u64 bytenr;
8396         u32 leafsize;
8397         int level = btrfs_header_level(eb);
8398         int nritems;
8399         int ret;
8400         int i;
8401
8402         /*
8403          * If we have pinned this block before, don't pin it again.
8404          * This can not only avoid forever loop with broken filesystem
8405          * but also give us some speedups.
8406          */
8407         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8408                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8409                 return 0;
8410
8411         btrfs_pin_extent(fs_info, eb->start, eb->len);
8412
8413         leafsize = btrfs_super_leafsize(fs_info->super_copy);
8414         nritems = btrfs_header_nritems(eb);
8415         for (i = 0; i < nritems; i++) {
8416                 if (level == 0) {
8417                         btrfs_item_key_to_cpu(eb, &key, i);
8418                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8419                                 continue;
8420                         /* Skip the extent root and reloc roots */
8421                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8422                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8423                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8424                                 continue;
8425                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8426                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8427
8428                         /*
8429                          * If at any point we start needing the real root we
8430                          * will have to build a stump root for the root we are
8431                          * in, but for now this doesn't actually use the root so
8432                          * just pass in extent_root.
8433                          */
8434                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8435                                               leafsize, 0);
8436                         if (!extent_buffer_uptodate(tmp)) {
8437                                 fprintf(stderr, "Error reading root block\n");
8438                                 return -EIO;
8439                         }
8440                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8441                         free_extent_buffer(tmp);
8442                         if (ret)
8443                                 return ret;
8444                 } else {
8445                         bytenr = btrfs_node_blockptr(eb, i);
8446
8447                         /* If we aren't the tree root don't read the block */
8448                         if (level == 1 && !tree_root) {
8449                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
8450                                 continue;
8451                         }
8452
8453                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8454                                               leafsize, 0);
8455                         if (!extent_buffer_uptodate(tmp)) {
8456                                 fprintf(stderr, "Error reading tree block\n");
8457                                 return -EIO;
8458                         }
8459                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8460                         free_extent_buffer(tmp);
8461                         if (ret)
8462                                 return ret;
8463                 }
8464         }
8465
8466         return 0;
8467 }
8468
8469 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8470 {
8471         int ret;
8472
8473         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8474         if (ret)
8475                 return ret;
8476
8477         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8478 }
8479
8480 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8481 {
8482         struct btrfs_block_group_cache *cache;
8483         struct btrfs_path *path;
8484         struct extent_buffer *leaf;
8485         struct btrfs_chunk *chunk;
8486         struct btrfs_key key;
8487         int ret;
8488         u64 start;
8489
8490         path = btrfs_alloc_path();
8491         if (!path)
8492                 return -ENOMEM;
8493
8494         key.objectid = 0;
8495         key.type = BTRFS_CHUNK_ITEM_KEY;
8496         key.offset = 0;
8497
8498         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8499         if (ret < 0) {
8500                 btrfs_free_path(path);
8501                 return ret;
8502         }
8503
8504         /*
8505          * We do this in case the block groups were screwed up and had alloc
8506          * bits that aren't actually set on the chunks.  This happens with
8507          * restored images every time and could happen in real life I guess.
8508          */
8509         fs_info->avail_data_alloc_bits = 0;
8510         fs_info->avail_metadata_alloc_bits = 0;
8511         fs_info->avail_system_alloc_bits = 0;
8512
8513         /* First we need to create the in-memory block groups */
8514         while (1) {
8515                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8516                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8517                         if (ret < 0) {
8518                                 btrfs_free_path(path);
8519                                 return ret;
8520                         }
8521                         if (ret) {
8522                                 ret = 0;
8523                                 break;
8524                         }
8525                 }
8526                 leaf = path->nodes[0];
8527                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8528                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8529                         path->slots[0]++;
8530                         continue;
8531                 }
8532
8533                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8534                                        struct btrfs_chunk);
8535                 btrfs_add_block_group(fs_info, 0,
8536                                       btrfs_chunk_type(leaf, chunk),
8537                                       key.objectid, key.offset,
8538                                       btrfs_chunk_length(leaf, chunk));
8539                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8540                                  key.offset + btrfs_chunk_length(leaf, chunk),
8541                                  GFP_NOFS);
8542                 path->slots[0]++;
8543         }
8544         start = 0;
8545         while (1) {
8546                 cache = btrfs_lookup_first_block_group(fs_info, start);
8547                 if (!cache)
8548                         break;
8549                 cache->cached = 1;
8550                 start = cache->key.objectid + cache->key.offset;
8551         }
8552
8553         btrfs_free_path(path);
8554         return 0;
8555 }
8556
8557 static int reset_balance(struct btrfs_trans_handle *trans,
8558                          struct btrfs_fs_info *fs_info)
8559 {
8560         struct btrfs_root *root = fs_info->tree_root;
8561         struct btrfs_path *path;
8562         struct extent_buffer *leaf;
8563         struct btrfs_key key;
8564         int del_slot, del_nr = 0;
8565         int ret;
8566         int found = 0;
8567
8568         path = btrfs_alloc_path();
8569         if (!path)
8570                 return -ENOMEM;
8571
8572         key.objectid = BTRFS_BALANCE_OBJECTID;
8573         key.type = BTRFS_BALANCE_ITEM_KEY;
8574         key.offset = 0;
8575
8576         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8577         if (ret) {
8578                 if (ret > 0)
8579                         ret = 0;
8580                 if (!ret)
8581                         goto reinit_data_reloc;
8582                 else
8583                         goto out;
8584         }
8585
8586         ret = btrfs_del_item(trans, root, path);
8587         if (ret)
8588                 goto out;
8589         btrfs_release_path(path);
8590
8591         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8592         key.type = BTRFS_ROOT_ITEM_KEY;
8593         key.offset = 0;
8594
8595         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8596         if (ret < 0)
8597                 goto out;
8598         while (1) {
8599                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8600                         if (!found)
8601                                 break;
8602
8603                         if (del_nr) {
8604                                 ret = btrfs_del_items(trans, root, path,
8605                                                       del_slot, del_nr);
8606                                 del_nr = 0;
8607                                 if (ret)
8608                                         goto out;
8609                         }
8610                         key.offset++;
8611                         btrfs_release_path(path);
8612
8613                         found = 0;
8614                         ret = btrfs_search_slot(trans, root, &key, path,
8615                                                 -1, 1);
8616                         if (ret < 0)
8617                                 goto out;
8618                         continue;
8619                 }
8620                 found = 1;
8621                 leaf = path->nodes[0];
8622                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8623                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8624                         break;
8625                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8626                         path->slots[0]++;
8627                         continue;
8628                 }
8629                 if (!del_nr) {
8630                         del_slot = path->slots[0];
8631                         del_nr = 1;
8632                 } else {
8633                         del_nr++;
8634                 }
8635                 path->slots[0]++;
8636         }
8637
8638         if (del_nr) {
8639                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8640                 if (ret)
8641                         goto out;
8642         }
8643         btrfs_release_path(path);
8644
8645 reinit_data_reloc:
8646         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8647         key.type = BTRFS_ROOT_ITEM_KEY;
8648         key.offset = (u64)-1;
8649         root = btrfs_read_fs_root(fs_info, &key);
8650         if (IS_ERR(root)) {
8651                 fprintf(stderr, "Error reading data reloc tree\n");
8652                 ret = PTR_ERR(root);
8653                 goto out;
8654         }
8655         record_root_in_trans(trans, root);
8656         ret = btrfs_fsck_reinit_root(trans, root, 0);
8657         if (ret)
8658                 goto out;
8659         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8660 out:
8661         btrfs_free_path(path);
8662         return ret;
8663 }
8664
8665 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8666                               struct btrfs_fs_info *fs_info)
8667 {
8668         u64 start = 0;
8669         int ret;
8670
8671         /*
8672          * The only reason we don't do this is because right now we're just
8673          * walking the trees we find and pinning down their bytes, we don't look
8674          * at any of the leaves.  In order to do mixed groups we'd have to check
8675          * the leaves of any fs roots and pin down the bytes for any file
8676          * extents we find.  Not hard but why do it if we don't have to?
8677          */
8678         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8679                 fprintf(stderr, "We don't support re-initing the extent tree "
8680                         "for mixed block groups yet, please notify a btrfs "
8681                         "developer you want to do this so they can add this "
8682                         "functionality.\n");
8683                 return -EINVAL;
8684         }
8685
8686         /*
8687          * first we need to walk all of the trees except the extent tree and pin
8688          * down the bytes that are in use so we don't overwrite any existing
8689          * metadata.
8690          */
8691         ret = pin_metadata_blocks(fs_info);
8692         if (ret) {
8693                 fprintf(stderr, "error pinning down used bytes\n");
8694                 return ret;
8695         }
8696
8697         /*
8698          * Need to drop all the block groups since we're going to recreate all
8699          * of them again.
8700          */
8701         btrfs_free_block_groups(fs_info);
8702         ret = reset_block_groups(fs_info);
8703         if (ret) {
8704                 fprintf(stderr, "error resetting the block groups\n");
8705                 return ret;
8706         }
8707
8708         /* Ok we can allocate now, reinit the extent root */
8709         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8710         if (ret) {
8711                 fprintf(stderr, "extent root initialization failed\n");
8712                 /*
8713                  * When the transaction code is updated we should end the
8714                  * transaction, but for now progs only knows about commit so
8715                  * just return an error.
8716                  */
8717                 return ret;
8718         }
8719
8720         /*
8721          * Now we have all the in-memory block groups setup so we can make
8722          * allocations properly, and the metadata we care about is safe since we
8723          * pinned all of it above.
8724          */
8725         while (1) {
8726                 struct btrfs_block_group_cache *cache;
8727
8728                 cache = btrfs_lookup_first_block_group(fs_info, start);
8729                 if (!cache)
8730                         break;
8731                 start = cache->key.objectid + cache->key.offset;
8732                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8733                                         &cache->key, &cache->item,
8734                                         sizeof(cache->item));
8735                 if (ret) {
8736                         fprintf(stderr, "Error adding block group\n");
8737                         return ret;
8738                 }
8739                 btrfs_extent_post_op(trans, fs_info->extent_root);
8740         }
8741
8742         ret = reset_balance(trans, fs_info);
8743         if (ret)
8744                 fprintf(stderr, "error reseting the pending balance\n");
8745
8746         return ret;
8747 }
8748
8749 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8750 {
8751         struct btrfs_path *path;
8752         struct btrfs_trans_handle *trans;
8753         struct btrfs_key key;
8754         int ret;
8755
8756         printf("Recowing metadata block %llu\n", eb->start);
8757         key.objectid = btrfs_header_owner(eb);
8758         key.type = BTRFS_ROOT_ITEM_KEY;
8759         key.offset = (u64)-1;
8760
8761         root = btrfs_read_fs_root(root->fs_info, &key);
8762         if (IS_ERR(root)) {
8763                 fprintf(stderr, "Couldn't find owner root %llu\n",
8764                         key.objectid);
8765                 return PTR_ERR(root);
8766         }
8767
8768         path = btrfs_alloc_path();
8769         if (!path)
8770                 return -ENOMEM;
8771
8772         trans = btrfs_start_transaction(root, 1);
8773         if (IS_ERR(trans)) {
8774                 btrfs_free_path(path);
8775                 return PTR_ERR(trans);
8776         }
8777
8778         path->lowest_level = btrfs_header_level(eb);
8779         if (path->lowest_level)
8780                 btrfs_node_key_to_cpu(eb, &key, 0);
8781         else
8782                 btrfs_item_key_to_cpu(eb, &key, 0);
8783
8784         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8785         btrfs_commit_transaction(trans, root);
8786         btrfs_free_path(path);
8787         return ret;
8788 }
8789
8790 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8791 {
8792         struct btrfs_path *path;
8793         struct btrfs_trans_handle *trans;
8794         struct btrfs_key key;
8795         int ret;
8796
8797         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8798                bad->key.type, bad->key.offset);
8799         key.objectid = bad->root_id;
8800         key.type = BTRFS_ROOT_ITEM_KEY;
8801         key.offset = (u64)-1;
8802
8803         root = btrfs_read_fs_root(root->fs_info, &key);
8804         if (IS_ERR(root)) {
8805                 fprintf(stderr, "Couldn't find owner root %llu\n",
8806                         key.objectid);
8807                 return PTR_ERR(root);
8808         }
8809
8810         path = btrfs_alloc_path();
8811         if (!path)
8812                 return -ENOMEM;
8813
8814         trans = btrfs_start_transaction(root, 1);
8815         if (IS_ERR(trans)) {
8816                 btrfs_free_path(path);
8817                 return PTR_ERR(trans);
8818         }
8819
8820         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8821         if (ret) {
8822                 if (ret > 0)
8823                         ret = 0;
8824                 goto out;
8825         }
8826         ret = btrfs_del_item(trans, root, path);
8827 out:
8828         btrfs_commit_transaction(trans, root);
8829         btrfs_free_path(path);
8830         return ret;
8831 }
8832
8833 static int zero_log_tree(struct btrfs_root *root)
8834 {
8835         struct btrfs_trans_handle *trans;
8836         int ret;
8837
8838         trans = btrfs_start_transaction(root, 1);
8839         if (IS_ERR(trans)) {
8840                 ret = PTR_ERR(trans);
8841                 return ret;
8842         }
8843         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8844         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8845         ret = btrfs_commit_transaction(trans, root);
8846         return ret;
8847 }
8848
8849 static int populate_csum(struct btrfs_trans_handle *trans,
8850                          struct btrfs_root *csum_root, char *buf, u64 start,
8851                          u64 len)
8852 {
8853         u64 offset = 0;
8854         u64 sectorsize;
8855         int ret = 0;
8856
8857         while (offset < len) {
8858                 sectorsize = csum_root->sectorsize;
8859                 ret = read_extent_data(csum_root, buf, start + offset,
8860                                        &sectorsize, 0);
8861                 if (ret)
8862                         break;
8863                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8864                                             start + offset, buf, sectorsize);
8865                 if (ret)
8866                         break;
8867                 offset += sectorsize;
8868         }
8869         return ret;
8870 }
8871
8872 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8873                                       struct btrfs_root *csum_root,
8874                                       struct btrfs_root *cur_root)
8875 {
8876         struct btrfs_path *path;
8877         struct btrfs_key key;
8878         struct extent_buffer *node;
8879         struct btrfs_file_extent_item *fi;
8880         char *buf = NULL;
8881         u64 start = 0;
8882         u64 len = 0;
8883         int slot = 0;
8884         int ret = 0;
8885
8886         path = btrfs_alloc_path();
8887         if (!path)
8888                 return -ENOMEM;
8889         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8890         if (!buf) {
8891                 ret = -ENOMEM;
8892                 goto out;
8893         }
8894
8895         key.objectid = 0;
8896         key.offset = 0;
8897         key.type = 0;
8898
8899         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8900         if (ret < 0)
8901                 goto out;
8902         /* Iterate all regular file extents and fill its csum */
8903         while (1) {
8904                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8905
8906                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8907                         goto next;
8908                 node = path->nodes[0];
8909                 slot = path->slots[0];
8910                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8911                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8912                         goto next;
8913                 start = btrfs_file_extent_disk_bytenr(node, fi);
8914                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8915
8916                 ret = populate_csum(trans, csum_root, buf, start, len);
8917                 if (ret == -EEXIST)
8918                         ret = 0;
8919                 if (ret < 0)
8920                         goto out;
8921 next:
8922                 /*
8923                  * TODO: if next leaf is corrupted, jump to nearest next valid
8924                  * leaf.
8925                  */
8926                 ret = btrfs_next_item(cur_root, path);
8927                 if (ret < 0)
8928                         goto out;
8929                 if (ret > 0) {
8930                         ret = 0;
8931                         goto out;
8932                 }
8933         }
8934
8935 out:
8936         btrfs_free_path(path);
8937         free(buf);
8938         return ret;
8939 }
8940
8941 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8942                                   struct btrfs_root *csum_root)
8943 {
8944         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8945         struct btrfs_path *path;
8946         struct btrfs_root *tree_root = fs_info->tree_root;
8947         struct btrfs_root *cur_root;
8948         struct extent_buffer *node;
8949         struct btrfs_key key;
8950         int slot = 0;
8951         int ret = 0;
8952
8953         path = btrfs_alloc_path();
8954         if (!path)
8955                 return -ENOMEM;
8956
8957         key.objectid = BTRFS_FS_TREE_OBJECTID;
8958         key.offset = 0;
8959         key.type = BTRFS_ROOT_ITEM_KEY;
8960
8961         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8962         if (ret < 0)
8963                 goto out;
8964         if (ret > 0) {
8965                 ret = -ENOENT;
8966                 goto out;
8967         }
8968
8969         while (1) {
8970                 node = path->nodes[0];
8971                 slot = path->slots[0];
8972                 btrfs_item_key_to_cpu(node, &key, slot);
8973                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8974                         goto out;
8975                 if (key.type != BTRFS_ROOT_ITEM_KEY)
8976                         goto next;
8977                 if (!is_fstree(key.objectid))
8978                         goto next;
8979                 key.offset = (u64)-1;
8980
8981                 cur_root = btrfs_read_fs_root(fs_info, &key);
8982                 if (IS_ERR(cur_root) || !cur_root) {
8983                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8984                                 key.objectid);
8985                         goto out;
8986                 }
8987                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8988                                 cur_root);
8989                 if (ret < 0)
8990                         goto out;
8991 next:
8992                 ret = btrfs_next_item(tree_root, path);
8993                 if (ret > 0) {
8994                         ret = 0;
8995                         goto out;
8996                 }
8997                 if (ret < 0)
8998                         goto out;
8999         }
9000
9001 out:
9002         btrfs_free_path(path);
9003         return ret;
9004 }
9005
9006 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9007                                       struct btrfs_root *csum_root)
9008 {
9009         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9010         struct btrfs_path *path;
9011         struct btrfs_extent_item *ei;
9012         struct extent_buffer *leaf;
9013         char *buf;
9014         struct btrfs_key key;
9015         int ret;
9016
9017         path = btrfs_alloc_path();
9018         if (!path)
9019                 return -ENOMEM;
9020
9021         key.objectid = 0;
9022         key.type = BTRFS_EXTENT_ITEM_KEY;
9023         key.offset = 0;
9024
9025         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9026         if (ret < 0) {
9027                 btrfs_free_path(path);
9028                 return ret;
9029         }
9030
9031         buf = malloc(csum_root->sectorsize);
9032         if (!buf) {
9033                 btrfs_free_path(path);
9034                 return -ENOMEM;
9035         }
9036
9037         while (1) {
9038                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9039                         ret = btrfs_next_leaf(extent_root, path);
9040                         if (ret < 0)
9041                                 break;
9042                         if (ret) {
9043                                 ret = 0;
9044                                 break;
9045                         }
9046                 }
9047                 leaf = path->nodes[0];
9048
9049                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9050                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9051                         path->slots[0]++;
9052                         continue;
9053                 }
9054
9055                 ei = btrfs_item_ptr(leaf, path->slots[0],
9056                                     struct btrfs_extent_item);
9057                 if (!(btrfs_extent_flags(leaf, ei) &
9058                       BTRFS_EXTENT_FLAG_DATA)) {
9059                         path->slots[0]++;
9060                         continue;
9061                 }
9062
9063                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9064                                     key.offset);
9065                 if (ret)
9066                         break;
9067                 path->slots[0]++;
9068         }
9069
9070         btrfs_free_path(path);
9071         free(buf);
9072         return ret;
9073 }
9074
9075 /*
9076  * Recalculate the csum and put it into the csum tree.
9077  *
9078  * Extent tree init will wipe out all the extent info, so in that case, we
9079  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9080  * will use fs/subvol trees to init the csum tree.
9081  */
9082 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9083                           struct btrfs_root *csum_root,
9084                           int search_fs_tree)
9085 {
9086         if (search_fs_tree)
9087                 return fill_csum_tree_from_fs(trans, csum_root);
9088         else
9089                 return fill_csum_tree_from_extent(trans, csum_root);
9090 }
9091
9092 struct root_item_info {
9093         /* level of the root */
9094         u8 level;
9095         /* number of nodes at this level, must be 1 for a root */
9096         int node_count;
9097         u64 bytenr;
9098         u64 gen;
9099         struct cache_extent cache_extent;
9100 };
9101
9102 static struct cache_tree *roots_info_cache = NULL;
9103
9104 static void free_roots_info_cache(void)
9105 {
9106         if (!roots_info_cache)
9107                 return;
9108
9109         while (!cache_tree_empty(roots_info_cache)) {
9110                 struct cache_extent *entry;
9111                 struct root_item_info *rii;
9112
9113                 entry = first_cache_extent(roots_info_cache);
9114                 if (!entry)
9115                         break;
9116                 remove_cache_extent(roots_info_cache, entry);
9117                 rii = container_of(entry, struct root_item_info, cache_extent);
9118                 free(rii);
9119         }
9120
9121         free(roots_info_cache);
9122         roots_info_cache = NULL;
9123 }
9124
9125 static int build_roots_info_cache(struct btrfs_fs_info *info)
9126 {
9127         int ret = 0;
9128         struct btrfs_key key;
9129         struct extent_buffer *leaf;
9130         struct btrfs_path *path;
9131
9132         if (!roots_info_cache) {
9133                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9134                 if (!roots_info_cache)
9135                         return -ENOMEM;
9136                 cache_tree_init(roots_info_cache);
9137         }
9138
9139         path = btrfs_alloc_path();
9140         if (!path)
9141                 return -ENOMEM;
9142
9143         key.objectid = 0;
9144         key.type = BTRFS_EXTENT_ITEM_KEY;
9145         key.offset = 0;
9146
9147         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9148         if (ret < 0)
9149                 goto out;
9150         leaf = path->nodes[0];
9151
9152         while (1) {
9153                 struct btrfs_key found_key;
9154                 struct btrfs_extent_item *ei;
9155                 struct btrfs_extent_inline_ref *iref;
9156                 int slot = path->slots[0];
9157                 int type;
9158                 u64 flags;
9159                 u64 root_id;
9160                 u8 level;
9161                 struct cache_extent *entry;
9162                 struct root_item_info *rii;
9163
9164                 if (slot >= btrfs_header_nritems(leaf)) {
9165                         ret = btrfs_next_leaf(info->extent_root, path);
9166                         if (ret < 0) {
9167                                 break;
9168                         } else if (ret) {
9169                                 ret = 0;
9170                                 break;
9171                         }
9172                         leaf = path->nodes[0];
9173                         slot = path->slots[0];
9174                 }
9175
9176                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9177
9178                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9179                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9180                         goto next;
9181
9182                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9183                 flags = btrfs_extent_flags(leaf, ei);
9184
9185                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9186                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9187                         goto next;
9188
9189                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9190                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9191                         level = found_key.offset;
9192                 } else {
9193                         struct btrfs_tree_block_info *info;
9194
9195                         info = (struct btrfs_tree_block_info *)(ei + 1);
9196                         iref = (struct btrfs_extent_inline_ref *)(info + 1);
9197                         level = btrfs_tree_block_level(leaf, info);
9198                 }
9199
9200                 /*
9201                  * For a root extent, it must be of the following type and the
9202                  * first (and only one) iref in the item.
9203                  */
9204                 type = btrfs_extent_inline_ref_type(leaf, iref);
9205                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9206                         goto next;
9207
9208                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9209                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9210                 if (!entry) {
9211                         rii = malloc(sizeof(struct root_item_info));
9212                         if (!rii) {
9213                                 ret = -ENOMEM;
9214                                 goto out;
9215                         }
9216                         rii->cache_extent.start = root_id;
9217                         rii->cache_extent.size = 1;
9218                         rii->level = (u8)-1;
9219                         entry = &rii->cache_extent;
9220                         ret = insert_cache_extent(roots_info_cache, entry);
9221                         ASSERT(ret == 0);
9222                 } else {
9223                         rii = container_of(entry, struct root_item_info,
9224                                            cache_extent);
9225                 }
9226
9227                 ASSERT(rii->cache_extent.start == root_id);
9228                 ASSERT(rii->cache_extent.size == 1);
9229
9230                 if (level > rii->level || rii->level == (u8)-1) {
9231                         rii->level = level;
9232                         rii->bytenr = found_key.objectid;
9233                         rii->gen = btrfs_extent_generation(leaf, ei);
9234                         rii->node_count = 1;
9235                 } else if (level == rii->level) {
9236                         rii->node_count++;
9237                 }
9238 next:
9239                 path->slots[0]++;
9240         }
9241
9242 out:
9243         btrfs_free_path(path);
9244
9245         return ret;
9246 }
9247
9248 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9249                                   struct btrfs_path *path,
9250                                   const struct btrfs_key *root_key,
9251                                   const int read_only_mode)
9252 {
9253         const u64 root_id = root_key->objectid;
9254         struct cache_extent *entry;
9255         struct root_item_info *rii;
9256         struct btrfs_root_item ri;
9257         unsigned long offset;
9258
9259         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9260         if (!entry) {
9261                 fprintf(stderr,
9262                         "Error: could not find extent items for root %llu\n",
9263                         root_key->objectid);
9264                 return -ENOENT;
9265         }
9266
9267         rii = container_of(entry, struct root_item_info, cache_extent);
9268         ASSERT(rii->cache_extent.start == root_id);
9269         ASSERT(rii->cache_extent.size == 1);
9270
9271         if (rii->node_count != 1) {
9272                 fprintf(stderr,
9273                         "Error: could not find btree root extent for root %llu\n",
9274                         root_id);
9275                 return -ENOENT;
9276         }
9277
9278         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9279         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9280
9281         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9282             btrfs_root_level(&ri) != rii->level ||
9283             btrfs_root_generation(&ri) != rii->gen) {
9284
9285                 /*
9286                  * If we're in repair mode but our caller told us to not update
9287                  * the root item, i.e. just check if it needs to be updated, don't
9288                  * print this message, since the caller will call us again shortly
9289                  * for the same root item without read only mode (the caller will
9290                  * open a transaction first).
9291                  */
9292                 if (!(read_only_mode && repair))
9293                         fprintf(stderr,
9294                                 "%sroot item for root %llu,"
9295                                 " current bytenr %llu, current gen %llu, current level %u,"
9296                                 " new bytenr %llu, new gen %llu, new level %u\n",
9297                                 (read_only_mode ? "" : "fixing "),
9298                                 root_id,
9299                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9300                                 btrfs_root_level(&ri),
9301                                 rii->bytenr, rii->gen, rii->level);
9302
9303                 if (btrfs_root_generation(&ri) > rii->gen) {
9304                         fprintf(stderr,
9305                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9306                                 root_id, btrfs_root_generation(&ri), rii->gen);
9307                         return -EINVAL;
9308                 }
9309
9310                 if (!read_only_mode) {
9311                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9312                         btrfs_set_root_level(&ri, rii->level);
9313                         btrfs_set_root_generation(&ri, rii->gen);
9314                         write_extent_buffer(path->nodes[0], &ri,
9315                                             offset, sizeof(ri));
9316                 }
9317
9318                 return 1;
9319         }
9320
9321         return 0;
9322 }
9323
9324 /*
9325  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9326  * caused read-only snapshots to be corrupted if they were created at a moment
9327  * when the source subvolume/snapshot had orphan items. The issue was that the
9328  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9329  * node instead of the post orphan cleanup root node.
9330  * So this function, and its callees, just detects and fixes those cases. Even
9331  * though the regression was for read-only snapshots, this function applies to
9332  * any snapshot/subvolume root.
9333  * This must be run before any other repair code - not doing it so, makes other
9334  * repair code delete or modify backrefs in the extent tree for example, which
9335  * will result in an inconsistent fs after repairing the root items.
9336  */
9337 static int repair_root_items(struct btrfs_fs_info *info)
9338 {
9339         struct btrfs_path *path = NULL;
9340         struct btrfs_key key;
9341         struct extent_buffer *leaf;
9342         struct btrfs_trans_handle *trans = NULL;
9343         int ret = 0;
9344         int bad_roots = 0;
9345         int need_trans = 0;
9346
9347         ret = build_roots_info_cache(info);
9348         if (ret)
9349                 goto out;
9350
9351         path = btrfs_alloc_path();
9352         if (!path) {
9353                 ret = -ENOMEM;
9354                 goto out;
9355         }
9356
9357         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9358         key.type = BTRFS_ROOT_ITEM_KEY;
9359         key.offset = 0;
9360
9361 again:
9362         /*
9363          * Avoid opening and committing transactions if a leaf doesn't have
9364          * any root items that need to be fixed, so that we avoid rotating
9365          * backup roots unnecessarily.
9366          */
9367         if (need_trans) {
9368                 trans = btrfs_start_transaction(info->tree_root, 1);
9369                 if (IS_ERR(trans)) {
9370                         ret = PTR_ERR(trans);
9371                         goto out;
9372                 }
9373         }
9374
9375         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9376                                 0, trans ? 1 : 0);
9377         if (ret < 0)
9378                 goto out;
9379         leaf = path->nodes[0];
9380
9381         while (1) {
9382                 struct btrfs_key found_key;
9383
9384                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9385                         int no_more_keys = find_next_key(path, &key);
9386
9387                         btrfs_release_path(path);
9388                         if (trans) {
9389                                 ret = btrfs_commit_transaction(trans,
9390                                                                info->tree_root);
9391                                 trans = NULL;
9392                                 if (ret < 0)
9393                                         goto out;
9394                         }
9395                         need_trans = 0;
9396                         if (no_more_keys)
9397                                 break;
9398                         goto again;
9399                 }
9400
9401                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9402
9403                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9404                         goto next;
9405                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9406                         goto next;
9407
9408                 ret = maybe_repair_root_item(info, path, &found_key,
9409                                              trans ? 0 : 1);
9410                 if (ret < 0)
9411                         goto out;
9412                 if (ret) {
9413                         if (!trans && repair) {
9414                                 need_trans = 1;
9415                                 key = found_key;
9416                                 btrfs_release_path(path);
9417                                 goto again;
9418                         }
9419                         bad_roots++;
9420                 }
9421 next:
9422                 path->slots[0]++;
9423         }
9424         ret = 0;
9425 out:
9426         free_roots_info_cache();
9427         btrfs_free_path(path);
9428         if (trans)
9429                 btrfs_commit_transaction(trans, info->tree_root);
9430         if (ret < 0)
9431                 return ret;
9432
9433         return bad_roots;
9434 }
9435
9436 const char * const cmd_check_usage[] = {
9437         "btrfs check [options] <device>",
9438         "Check structural inegrity of a filesystem (unmounted).",
9439         "Check structural inegrity of an unmounted filesystem. Verify internal",
9440         "trees' consistency and item connectivity. In the repair mode try to",
9441         "fix the problems found.",
9442         "WARNING: the repair mode is considered dangerous",
9443         "",
9444         "-s|--super <superblock>     use this superblock copy",
9445         "-b|--backup                 use the backup root copy",
9446         "--repair                    try to repair the filesystem",
9447         "--readonly                  run in read-only mode (default)",
9448         "--init-csum-tree            create a new CRC tree",
9449         "--init-extent-tree          create a new extent tree",
9450         "--check-data-csum           verify checkums of data blocks",
9451         "-Q|--qgroup-report           print a report on qgroup consistency",
9452         "-E|--subvol-extents <subvolid>",
9453         "                            print subvolume extents and sharing state",
9454         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9455         "-p|--progress               indicate progress",
9456         NULL
9457 };
9458
9459 int cmd_check(int argc, char **argv)
9460 {
9461         struct cache_tree root_cache;
9462         struct btrfs_root *root;
9463         struct btrfs_fs_info *info;
9464         u64 bytenr = 0;
9465         u64 subvolid = 0;
9466         u64 tree_root_bytenr = 0;
9467         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9468         int ret;
9469         u64 num;
9470         int init_csum_tree = 0;
9471         int readonly = 0;
9472         int qgroup_report = 0;
9473         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9474
9475         while(1) {
9476                 int c;
9477                 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9478                         OPT_CHECK_CSUM, OPT_READONLY };
9479                 static const struct option long_options[] = {
9480                         { "super", required_argument, NULL, 's' },
9481                         { "repair", no_argument, NULL, OPT_REPAIR },
9482                         { "readonly", no_argument, NULL, OPT_READONLY },
9483                         { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9484                         { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9485                         { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9486                         { "backup", no_argument, NULL, 'b' },
9487                         { "subvol-extents", required_argument, NULL, 'E' },
9488                         { "qgroup-report", no_argument, NULL, 'Q' },
9489                         { "tree-root", required_argument, NULL, 'r' },
9490                         { "progress", no_argument, NULL, 'p' },
9491                         { NULL, 0, NULL, 0}
9492                 };
9493
9494                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9495                 if (c < 0)
9496                         break;
9497                 switch(c) {
9498                         case 'a': /* ignored */ break;
9499                         case 'b':
9500                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9501                                 break;
9502                         case 's':
9503                                 num = arg_strtou64(optarg);
9504                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9505                                         fprintf(stderr,
9506                                                 "ERROR: super mirror should be less than: %d\n",
9507                                                 BTRFS_SUPER_MIRROR_MAX);
9508                                         exit(1);
9509                                 }
9510                                 bytenr = btrfs_sb_offset(((int)num));
9511                                 printf("using SB copy %llu, bytenr %llu\n", num,
9512                                        (unsigned long long)bytenr);
9513                                 break;
9514                         case 'Q':
9515                                 qgroup_report = 1;
9516                                 break;
9517                         case 'E':
9518                                 subvolid = arg_strtou64(optarg);
9519                                 break;
9520                         case 'r':
9521                                 tree_root_bytenr = arg_strtou64(optarg);
9522                                 break;
9523                         case 'p':
9524                                 ctx.progress_enabled = true;
9525                                 break;
9526                         case '?':
9527                         case 'h':
9528                                 usage(cmd_check_usage);
9529                         case OPT_REPAIR:
9530                                 printf("enabling repair mode\n");
9531                                 repair = 1;
9532                                 ctree_flags |= OPEN_CTREE_WRITES;
9533                                 break;
9534                         case OPT_READONLY:
9535                                 readonly = 1;
9536                                 break;
9537                         case OPT_INIT_CSUM:
9538                                 printf("Creating a new CRC tree\n");
9539                                 init_csum_tree = 1;
9540                                 repair = 1;
9541                                 ctree_flags |= OPEN_CTREE_WRITES;
9542                                 break;
9543                         case OPT_INIT_EXTENT:
9544                                 init_extent_tree = 1;
9545                                 ctree_flags |= (OPEN_CTREE_WRITES |
9546                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9547                                 repair = 1;
9548                                 break;
9549                         case OPT_CHECK_CSUM:
9550                                 check_data_csum = 1;
9551                                 break;
9552                 }
9553         }
9554         argc = argc - optind;
9555
9556         if (check_argc_exact(argc, 1))
9557                 usage(cmd_check_usage);
9558
9559         if (ctx.progress_enabled) {
9560                 ctx.tp = TASK_NOTHING;
9561                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9562         }
9563
9564         /* This check is the only reason for --readonly to exist */
9565         if (readonly && repair) {
9566                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9567                 exit(1);
9568         }
9569
9570         radix_tree_init();
9571         cache_tree_init(&root_cache);
9572
9573         if((ret = check_mounted(argv[optind])) < 0) {
9574                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9575                 goto err_out;
9576         } else if(ret) {
9577                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9578                 ret = -EBUSY;
9579                 goto err_out;
9580         }
9581
9582         /* only allow partial opening under repair mode */
9583         if (repair)
9584                 ctree_flags |= OPEN_CTREE_PARTIAL;
9585
9586         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9587                                   ctree_flags);
9588         if (!info) {
9589                 fprintf(stderr, "Couldn't open file system\n");
9590                 ret = -EIO;
9591                 goto err_out;
9592         }
9593
9594         global_info = info;
9595         root = info->fs_root;
9596
9597         /*
9598          * repair mode will force us to commit transaction which
9599          * will make us fail to load log tree when mounting.
9600          */
9601         if (repair && btrfs_super_log_root(info->super_copy)) {
9602                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9603                 if (!ret) {
9604                         ret = 1;
9605                         goto close_out;
9606                 }
9607                 ret = zero_log_tree(root);
9608                 if (ret) {
9609                         fprintf(stderr, "fail to zero log tree\n");
9610                         goto close_out;
9611                 }
9612         }
9613
9614         uuid_unparse(info->super_copy->fsid, uuidbuf);
9615         if (qgroup_report) {
9616                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9617                        uuidbuf);
9618                 ret = qgroup_verify_all(info);
9619                 if (ret == 0)
9620                         print_qgroup_report(1);
9621                 goto close_out;
9622         }
9623         if (subvolid) {
9624                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9625                        subvolid, argv[optind], uuidbuf);
9626                 ret = print_extent_state(info, subvolid);
9627                 goto close_out;
9628         }
9629         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9630
9631         if (!extent_buffer_uptodate(info->tree_root->node) ||
9632             !extent_buffer_uptodate(info->dev_root->node) ||
9633             !extent_buffer_uptodate(info->chunk_root->node)) {
9634                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9635                 ret = -EIO;
9636                 goto close_out;
9637         }
9638
9639         if (init_extent_tree || init_csum_tree) {
9640                 struct btrfs_trans_handle *trans;
9641
9642                 trans = btrfs_start_transaction(info->extent_root, 0);
9643                 if (IS_ERR(trans)) {
9644                         fprintf(stderr, "Error starting transaction\n");
9645                         ret = PTR_ERR(trans);
9646                         goto close_out;
9647                 }
9648
9649                 if (init_extent_tree) {
9650                         printf("Creating a new extent tree\n");
9651                         ret = reinit_extent_tree(trans, info);
9652                         if (ret)
9653                                 goto close_out;
9654                 }
9655
9656                 if (init_csum_tree) {
9657                         fprintf(stderr, "Reinit crc root\n");
9658                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9659                         if (ret) {
9660                                 fprintf(stderr, "crc root initialization failed\n");
9661                                 ret = -EIO;
9662                                 goto close_out;
9663                         }
9664
9665                         ret = fill_csum_tree(trans, info->csum_root,
9666                                              init_extent_tree);
9667                         if (ret) {
9668                                 fprintf(stderr, "crc refilling failed\n");
9669                                 return -EIO;
9670                         }
9671                 }
9672                 /*
9673                  * Ok now we commit and run the normal fsck, which will add
9674                  * extent entries for all of the items it finds.
9675                  */
9676                 ret = btrfs_commit_transaction(trans, info->extent_root);
9677                 if (ret)
9678                         goto close_out;
9679         }
9680         if (!extent_buffer_uptodate(info->extent_root->node)) {
9681                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9682                 ret = -EIO;
9683                 goto close_out;
9684         }
9685         if (!extent_buffer_uptodate(info->csum_root->node)) {
9686                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9687                 ret = -EIO;
9688                 goto close_out;
9689         }
9690
9691         if (!ctx.progress_enabled)
9692                 fprintf(stderr, "checking extents\n");
9693         ret = check_chunks_and_extents(root);
9694         if (ret)
9695                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9696
9697         ret = repair_root_items(info);
9698         if (ret < 0)
9699                 goto close_out;
9700         if (repair) {
9701                 fprintf(stderr, "Fixed %d roots.\n", ret);
9702                 ret = 0;
9703         } else if (ret > 0) {
9704                 fprintf(stderr,
9705                        "Found %d roots with an outdated root item.\n",
9706                        ret);
9707                 fprintf(stderr,
9708                         "Please run a filesystem check with the option --repair to fix them.\n");
9709                 ret = 1;
9710                 goto close_out;
9711         }
9712
9713         if (!ctx.progress_enabled)
9714                 fprintf(stderr, "checking free space cache\n");
9715         ret = check_space_cache(root);
9716         if (ret)
9717                 goto out;
9718
9719         /*
9720          * We used to have to have these hole extents in between our real
9721          * extents so if we don't have this flag set we need to make sure there
9722          * are no gaps in the file extents for inodes, otherwise we can just
9723          * ignore it when this happens.
9724          */
9725         no_holes = btrfs_fs_incompat(root->fs_info,
9726                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9727         if (!ctx.progress_enabled)
9728                 fprintf(stderr, "checking fs roots\n");
9729         ret = check_fs_roots(root, &root_cache);
9730         if (ret)
9731                 goto out;
9732
9733         fprintf(stderr, "checking csums\n");
9734         ret = check_csums(root);
9735         if (ret)
9736                 goto out;
9737
9738         fprintf(stderr, "checking root refs\n");
9739         ret = check_root_refs(root, &root_cache);
9740         if (ret)
9741                 goto out;
9742
9743         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9744                 struct extent_buffer *eb;
9745
9746                 eb = list_first_entry(&root->fs_info->recow_ebs,
9747                                       struct extent_buffer, recow);
9748                 list_del_init(&eb->recow);
9749                 ret = recow_extent_buffer(root, eb);
9750                 if (ret)
9751                         break;
9752         }
9753
9754         while (!list_empty(&delete_items)) {
9755                 struct bad_item *bad;
9756
9757                 bad = list_first_entry(&delete_items, struct bad_item, list);
9758                 list_del_init(&bad->list);
9759                 if (repair)
9760                         ret = delete_bad_item(root, bad);
9761                 free(bad);
9762         }
9763
9764         if (info->quota_enabled) {
9765                 int err;
9766                 fprintf(stderr, "checking quota groups\n");
9767                 err = qgroup_verify_all(info);
9768                 if (err)
9769                         goto out;
9770         }
9771
9772         if (!list_empty(&root->fs_info->recow_ebs)) {
9773                 fprintf(stderr, "Transid errors in file system\n");
9774                 ret = 1;
9775         }
9776 out:
9777         print_qgroup_report(0);
9778         if (found_old_backref) { /*
9779                  * there was a disk format change when mixed
9780                  * backref was in testing tree. The old format
9781                  * existed about one week.
9782                  */
9783                 printf("\n * Found old mixed backref format. "
9784                        "The old format is not supported! *"
9785                        "\n * Please mount the FS in readonly mode, "
9786                        "backup data and re-format the FS. *\n\n");
9787                 ret = 1;
9788         }
9789         printf("found %llu bytes used err is %d\n",
9790                (unsigned long long)bytes_used, ret);
9791         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9792         printf("total tree bytes: %llu\n",
9793                (unsigned long long)total_btree_bytes);
9794         printf("total fs tree bytes: %llu\n",
9795                (unsigned long long)total_fs_tree_bytes);
9796         printf("total extent tree bytes: %llu\n",
9797                (unsigned long long)total_extent_tree_bytes);
9798         printf("btree space waste bytes: %llu\n",
9799                (unsigned long long)btree_space_waste);
9800         printf("file data blocks allocated: %llu\n referenced %llu\n",
9801                 (unsigned long long)data_bytes_allocated,
9802                 (unsigned long long)data_bytes_referenced);
9803
9804         free_root_recs_tree(&root_cache);
9805 close_out:
9806         close_ctree(root);
9807 err_out:
9808         if (ctx.progress_enabled)
9809                 task_deinit(ctx.info);
9810
9811         return ret;
9812 }