btrfs-progs: handle errors in get_inode_backref and fail in the caller
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "btrfsck.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
41 #include "backref.h"
42 #include "ulist.h"
43
44 enum task_position {
45         TASK_EXTENTS,
46         TASK_FREE_SPACE,
47         TASK_FS_ROOTS,
48         TASK_NOTHING, /* have to be the last element */
49 };
50
51 struct task_ctx {
52         int progress_enabled;
53         enum task_position tp;
54
55         struct task_info *info;
56 };
57
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75
76 static void *print_status_check(void *p)
77 {
78         struct task_ctx *priv = p;
79         const char work_indicator[] = { '.', 'o', 'O', 'o' };
80         uint32_t count = 0;
81         static char *task_position_string[] = {
82                 "checking extents",
83                 "checking free space cache",
84                 "checking fs roots",
85         };
86
87         task_period_start(priv->info, 1000 /* 1s */);
88
89         if (priv->tp == TASK_NOTHING)
90                 return NULL;
91
92         while (1) {
93                 printf("%s [%c]\r", task_position_string[priv->tp],
94                                 work_indicator[count % 4]);
95                 count++;
96                 fflush(stdout);
97                 task_period_wait(priv->info);
98         }
99         return NULL;
100 }
101
102 static int print_status_return(void *p)
103 {
104         printf("\n");
105         fflush(stdout);
106
107         return 0;
108 }
109
110 struct extent_backref {
111         struct list_head list;
112         unsigned int is_data:1;
113         unsigned int found_extent_tree:1;
114         unsigned int full_backref:1;
115         unsigned int found_ref:1;
116         unsigned int broken:1;
117 };
118
119 struct data_backref {
120         struct extent_backref node;
121         union {
122                 u64 parent;
123                 u64 root;
124         };
125         u64 owner;
126         u64 offset;
127         u64 disk_bytenr;
128         u64 bytes;
129         u64 ram_bytes;
130         u32 num_refs;
131         u32 found_ref;
132 };
133
134 /*
135  * Much like data_backref, just removed the undetermined members
136  * and change it to use list_head.
137  * During extent scan, it is stored in root->orphan_data_extent.
138  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
139  */
140 struct orphan_data_extent {
141         struct list_head list;
142         u64 root;
143         u64 objectid;
144         u64 offset;
145         u64 disk_bytenr;
146         u64 disk_len;
147 };
148
149 struct tree_backref {
150         struct extent_backref node;
151         union {
152                 u64 parent;
153                 u64 root;
154         };
155 };
156
157 struct extent_record {
158         struct list_head backrefs;
159         struct list_head dups;
160         struct list_head list;
161         struct cache_extent cache;
162         struct btrfs_disk_key parent_key;
163         u64 start;
164         u64 max_size;
165         u64 nr;
166         u64 refs;
167         u64 extent_item_refs;
168         u64 generation;
169         u64 parent_generation;
170         u64 info_objectid;
171         u32 num_duplicates;
172         u8 info_level;
173         int flag_block_full_backref;
174         unsigned int found_rec:1;
175         unsigned int content_checked:1;
176         unsigned int owner_ref_checked:1;
177         unsigned int is_root:1;
178         unsigned int metadata:1;
179         unsigned int bad_full_backref:1;
180         unsigned int crossing_stripes:1;
181         unsigned int wrong_chunk_type:1;
182 };
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         unsigned int filetype:8;
190         int errors;
191         unsigned int ref_type;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 struct root_item_record {
199         struct list_head list;
200         u64 objectid;
201         u64 bytenr;
202         u64 last_snapshot;
203         u8 level;
204         u8 drop_level;
205         int level_size;
206         struct btrfs_key drop_key;
207 };
208
209 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
211 #define REF_ERR_NO_INODE_REF            (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
214 #define REF_ERR_DUP_INODE_REF           (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF             (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
222
223 struct file_extent_hole {
224         struct rb_node node;
225         u64 start;
226         u64 len;
227 };
228
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
231 {
232         struct file_extent_hole *hole;
233
234         if (RB_EMPTY_ROOT(holes))
235                 return (u64)-1;
236
237         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
238         return hole->start;
239 }
240
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
242 {
243         struct file_extent_hole *hole1;
244         struct file_extent_hole *hole2;
245
246         hole1 = rb_entry(node1, struct file_extent_hole, node);
247         hole2 = rb_entry(node2, struct file_extent_hole, node);
248
249         if (hole1->start > hole2->start)
250                 return -1;
251         if (hole1->start < hole2->start)
252                 return 1;
253         /* Now hole1->start == hole2->start */
254         if (hole1->len >= hole2->len)
255                 /*
256                  * Hole 1 will be merge center
257                  * Same hole will be merged later
258                  */
259                 return -1;
260         /* Hole 2 will be merge center */
261         return 1;
262 }
263
264 /*
265  * Add a hole to the record
266  *
267  * This will do hole merge for copy_file_extent_holes(),
268  * which will ensure there won't be continuous holes.
269  */
270 static int add_file_extent_hole(struct rb_root *holes,
271                                 u64 start, u64 len)
272 {
273         struct file_extent_hole *hole;
274         struct file_extent_hole *prev = NULL;
275         struct file_extent_hole *next = NULL;
276
277         hole = malloc(sizeof(*hole));
278         if (!hole)
279                 return -ENOMEM;
280         hole->start = start;
281         hole->len = len;
282         /* Since compare will not return 0, no -EEXIST will happen */
283         rb_insert(holes, &hole->node, compare_hole);
284
285         /* simple merge with previous hole */
286         if (rb_prev(&hole->node))
287                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
288                                 node);
289         if (prev && prev->start + prev->len >= hole->start) {
290                 hole->len = hole->start + hole->len - prev->start;
291                 hole->start = prev->start;
292                 rb_erase(&prev->node, holes);
293                 free(prev);
294                 prev = NULL;
295         }
296
297         /* iterate merge with next holes */
298         while (1) {
299                 if (!rb_next(&hole->node))
300                         break;
301                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
302                                         node);
303                 if (hole->start + hole->len >= next->start) {
304                         if (hole->start + hole->len <= next->start + next->len)
305                                 hole->len = next->start + next->len -
306                                             hole->start;
307                         rb_erase(&next->node, holes);
308                         free(next);
309                         next = NULL;
310                 } else
311                         break;
312         }
313         return 0;
314 }
315
316 static int compare_hole_range(struct rb_node *node, void *data)
317 {
318         struct file_extent_hole *hole;
319         u64 start;
320
321         hole = (struct file_extent_hole *)data;
322         start = hole->start;
323
324         hole = rb_entry(node, struct file_extent_hole, node);
325         if (start < hole->start)
326                 return -1;
327         if (start >= hole->start && start < hole->start + hole->len)
328                 return 0;
329         return 1;
330 }
331
332 /*
333  * Delete a hole in the record
334  *
335  * This will do the hole split and is much restrict than add.
336  */
337 static int del_file_extent_hole(struct rb_root *holes,
338                                 u64 start, u64 len)
339 {
340         struct file_extent_hole *hole;
341         struct file_extent_hole tmp;
342         u64 prev_start = 0;
343         u64 prev_len = 0;
344         u64 next_start = 0;
345         u64 next_len = 0;
346         struct rb_node *node;
347         int have_prev = 0;
348         int have_next = 0;
349         int ret = 0;
350
351         tmp.start = start;
352         tmp.len = len;
353         node = rb_search(holes, &tmp, compare_hole_range, NULL);
354         if (!node)
355                 return -EEXIST;
356         hole = rb_entry(node, struct file_extent_hole, node);
357         if (start + len > hole->start + hole->len)
358                 return -EEXIST;
359
360         /*
361          * Now there will be no overflap, delete the hole and re-add the
362          * split(s) if they exists.
363          */
364         if (start > hole->start) {
365                 prev_start = hole->start;
366                 prev_len = start - hole->start;
367                 have_prev = 1;
368         }
369         if (hole->start + hole->len > start + len) {
370                 next_start = start + len;
371                 next_len = hole->start + hole->len - start - len;
372                 have_next = 1;
373         }
374         rb_erase(node, holes);
375         free(hole);
376         if (have_prev) {
377                 ret = add_file_extent_hole(holes, prev_start, prev_len);
378                 if (ret < 0)
379                         return ret;
380         }
381         if (have_next) {
382                 ret = add_file_extent_hole(holes, next_start, next_len);
383                 if (ret < 0)
384                         return ret;
385         }
386         return 0;
387 }
388
389 static int copy_file_extent_holes(struct rb_root *dst,
390                                   struct rb_root *src)
391 {
392         struct file_extent_hole *hole;
393         struct rb_node *node;
394         int ret = 0;
395
396         node = rb_first(src);
397         while (node) {
398                 hole = rb_entry(node, struct file_extent_hole, node);
399                 ret = add_file_extent_hole(dst, hole->start, hole->len);
400                 if (ret)
401                         break;
402                 node = rb_next(node);
403         }
404         return ret;
405 }
406
407 static void free_file_extent_holes(struct rb_root *holes)
408 {
409         struct rb_node *node;
410         struct file_extent_hole *hole;
411
412         node = rb_first(holes);
413         while (node) {
414                 hole = rb_entry(node, struct file_extent_hole, node);
415                 rb_erase(node, holes);
416                 free(hole);
417                 node = rb_first(holes);
418         }
419 }
420
421 struct inode_record {
422         struct list_head backrefs;
423         unsigned int checked:1;
424         unsigned int merging:1;
425         unsigned int found_inode_item:1;
426         unsigned int found_dir_item:1;
427         unsigned int found_file_extent:1;
428         unsigned int found_csum_item:1;
429         unsigned int some_csum_missing:1;
430         unsigned int nodatasum:1;
431         int errors;
432
433         u64 ino;
434         u32 nlink;
435         u32 imode;
436         u64 isize;
437         u64 nbytes;
438
439         u32 found_link;
440         u64 found_size;
441         u64 extent_start;
442         u64 extent_end;
443         struct rb_root holes;
444         struct list_head orphan_extents;
445
446         u32 refs;
447 };
448
449 #define I_ERR_NO_INODE_ITEM             (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
464
465 struct root_backref {
466         struct list_head list;
467         unsigned int found_dir_item:1;
468         unsigned int found_dir_index:1;
469         unsigned int found_back_ref:1;
470         unsigned int found_forward_ref:1;
471         unsigned int reachable:1;
472         int errors;
473         u64 ref_root;
474         u64 dir;
475         u64 index;
476         u16 namelen;
477         char name[0];
478 };
479
480 struct root_record {
481         struct list_head backrefs;
482         struct cache_extent cache;
483         unsigned int found_root_item:1;
484         u64 objectid;
485         u32 found_ref;
486 };
487
488 struct ptr_node {
489         struct cache_extent cache;
490         void *data;
491 };
492
493 struct shared_node {
494         struct cache_extent cache;
495         struct cache_tree root_cache;
496         struct cache_tree inode_cache;
497         struct inode_record *current;
498         u32 refs;
499 };
500
501 struct block_info {
502         u64 start;
503         u32 size;
504 };
505
506 struct walk_control {
507         struct cache_tree shared;
508         struct shared_node *nodes[BTRFS_MAX_LEVEL];
509         int active_node;
510         int root_level;
511 };
512
513 struct bad_item {
514         struct btrfs_key key;
515         u64 root_id;
516         struct list_head list;
517 };
518
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
520
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522                                  struct btrfs_root *root)
523 {
524         if (root->last_trans != trans->transid) {
525                 root->track_dirty = 1;
526                 root->last_trans = trans->transid;
527                 root->commit_root = root->node;
528                 extent_buffer_get(root->node);
529         }
530 }
531
532 static u8 imode_to_type(u32 imode)
533 {
534 #define S_SHIFT 12
535         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
537                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
538                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
539                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
540                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
541                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
542                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
543         };
544
545         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
546 #undef S_SHIFT
547 }
548
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
550 {
551         struct device_record *rec1;
552         struct device_record *rec2;
553
554         rec1 = rb_entry(node1, struct device_record, node);
555         rec2 = rb_entry(node2, struct device_record, node);
556         if (rec1->devid > rec2->devid)
557                 return -1;
558         else if (rec1->devid < rec2->devid)
559                 return 1;
560         else
561                 return 0;
562 }
563
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
565 {
566         struct inode_record *rec;
567         struct inode_backref *backref;
568         struct inode_backref *orig;
569         struct inode_backref *tmp;
570         struct orphan_data_extent *src_orphan;
571         struct orphan_data_extent *dst_orphan;
572         size_t size;
573         int ret;
574
575         rec = malloc(sizeof(*rec));
576         if (!rec)
577                 return ERR_PTR(-ENOMEM);
578         memcpy(rec, orig_rec, sizeof(*rec));
579         rec->refs = 1;
580         INIT_LIST_HEAD(&rec->backrefs);
581         INIT_LIST_HEAD(&rec->orphan_extents);
582         rec->holes = RB_ROOT;
583
584         list_for_each_entry(orig, &orig_rec->backrefs, list) {
585                 size = sizeof(*orig) + orig->namelen + 1;
586                 backref = malloc(size);
587                 if (!backref) {
588                         ret = -ENOMEM;
589                         goto cleanup;
590                 }
591                 memcpy(backref, orig, size);
592                 list_add_tail(&backref->list, &rec->backrefs);
593         }
594         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
595                 dst_orphan = malloc(sizeof(*dst_orphan));
596                 if (!dst_orphan) {
597                         ret = -ENOMEM;
598                         goto cleanup;
599                 }
600                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
601                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
602         }
603         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
604         BUG_ON(ret < 0);
605
606         return rec;
607
608 cleanup:
609         if (!list_empty(&rec->backrefs))
610                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
611                         list_del(&orig->list);
612                         free(orig);
613                 }
614
615         if (!list_empty(&rec->orphan_extents))
616                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
617                         list_del(&orig->list);
618                         free(orig);
619                 }
620
621         free(rec);
622
623         return ERR_PTR(ret);
624 }
625
626 static void print_orphan_data_extents(struct list_head *orphan_extents,
627                                       u64 objectid)
628 {
629         struct orphan_data_extent *orphan;
630
631         if (list_empty(orphan_extents))
632                 return;
633         printf("The following data extent is lost in tree %llu:\n",
634                objectid);
635         list_for_each_entry(orphan, orphan_extents, list) {
636                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
637                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
638                        orphan->disk_len);
639         }
640 }
641
642 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
643 {
644         u64 root_objectid = root->root_key.objectid;
645         int errors = rec->errors;
646
647         if (!errors)
648                 return;
649         /* reloc root errors, we print its corresponding fs root objectid*/
650         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
651                 root_objectid = root->root_key.offset;
652                 fprintf(stderr, "reloc");
653         }
654         fprintf(stderr, "root %llu inode %llu errors %x",
655                 (unsigned long long) root_objectid,
656                 (unsigned long long) rec->ino, rec->errors);
657
658         if (errors & I_ERR_NO_INODE_ITEM)
659                 fprintf(stderr, ", no inode item");
660         if (errors & I_ERR_NO_ORPHAN_ITEM)
661                 fprintf(stderr, ", no orphan item");
662         if (errors & I_ERR_DUP_INODE_ITEM)
663                 fprintf(stderr, ", dup inode item");
664         if (errors & I_ERR_DUP_DIR_INDEX)
665                 fprintf(stderr, ", dup dir index");
666         if (errors & I_ERR_ODD_DIR_ITEM)
667                 fprintf(stderr, ", odd dir item");
668         if (errors & I_ERR_ODD_FILE_EXTENT)
669                 fprintf(stderr, ", odd file extent");
670         if (errors & I_ERR_BAD_FILE_EXTENT)
671                 fprintf(stderr, ", bad file extent");
672         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
673                 fprintf(stderr, ", file extent overlap");
674         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
675                 fprintf(stderr, ", file extent discount");
676         if (errors & I_ERR_DIR_ISIZE_WRONG)
677                 fprintf(stderr, ", dir isize wrong");
678         if (errors & I_ERR_FILE_NBYTES_WRONG)
679                 fprintf(stderr, ", nbytes wrong");
680         if (errors & I_ERR_ODD_CSUM_ITEM)
681                 fprintf(stderr, ", odd csum item");
682         if (errors & I_ERR_SOME_CSUM_MISSING)
683                 fprintf(stderr, ", some csum missing");
684         if (errors & I_ERR_LINK_COUNT_WRONG)
685                 fprintf(stderr, ", link count wrong");
686         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
687                 fprintf(stderr, ", orphan file extent");
688         fprintf(stderr, "\n");
689         /* Print the orphan extents if needed */
690         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
691                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
692
693         /* Print the holes if needed */
694         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
695                 struct file_extent_hole *hole;
696                 struct rb_node *node;
697                 int found = 0;
698
699                 node = rb_first(&rec->holes);
700                 fprintf(stderr, "Found file extent holes:\n");
701                 while (node) {
702                         found = 1;
703                         hole = rb_entry(node, struct file_extent_hole, node);
704                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
705                                 hole->start, hole->len);
706                         node = rb_next(node);
707                 }
708                 if (!found)
709                         fprintf(stderr, "\tstart: 0, len: %llu\n",
710                                 round_up(rec->isize, root->sectorsize));
711         }
712 }
713
714 static void print_ref_error(int errors)
715 {
716         if (errors & REF_ERR_NO_DIR_ITEM)
717                 fprintf(stderr, ", no dir item");
718         if (errors & REF_ERR_NO_DIR_INDEX)
719                 fprintf(stderr, ", no dir index");
720         if (errors & REF_ERR_NO_INODE_REF)
721                 fprintf(stderr, ", no inode ref");
722         if (errors & REF_ERR_DUP_DIR_ITEM)
723                 fprintf(stderr, ", dup dir item");
724         if (errors & REF_ERR_DUP_DIR_INDEX)
725                 fprintf(stderr, ", dup dir index");
726         if (errors & REF_ERR_DUP_INODE_REF)
727                 fprintf(stderr, ", dup inode ref");
728         if (errors & REF_ERR_INDEX_UNMATCH)
729                 fprintf(stderr, ", index unmatch");
730         if (errors & REF_ERR_FILETYPE_UNMATCH)
731                 fprintf(stderr, ", filetype unmatch");
732         if (errors & REF_ERR_NAME_TOO_LONG)
733                 fprintf(stderr, ", name too long");
734         if (errors & REF_ERR_NO_ROOT_REF)
735                 fprintf(stderr, ", no root ref");
736         if (errors & REF_ERR_NO_ROOT_BACKREF)
737                 fprintf(stderr, ", no root backref");
738         if (errors & REF_ERR_DUP_ROOT_REF)
739                 fprintf(stderr, ", dup root ref");
740         if (errors & REF_ERR_DUP_ROOT_BACKREF)
741                 fprintf(stderr, ", dup root backref");
742         fprintf(stderr, "\n");
743 }
744
745 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
746                                           u64 ino, int mod)
747 {
748         struct ptr_node *node;
749         struct cache_extent *cache;
750         struct inode_record *rec = NULL;
751         int ret;
752
753         cache = lookup_cache_extent(inode_cache, ino, 1);
754         if (cache) {
755                 node = container_of(cache, struct ptr_node, cache);
756                 rec = node->data;
757                 if (mod && rec->refs > 1) {
758                         node->data = clone_inode_rec(rec);
759                         if (IS_ERR(node->data))
760                                 return node->data;
761                         rec->refs--;
762                         rec = node->data;
763                 }
764         } else if (mod) {
765                 rec = calloc(1, sizeof(*rec));
766                 if (!rec)
767                         return ERR_PTR(-ENOMEM);
768                 rec->ino = ino;
769                 rec->extent_start = (u64)-1;
770                 rec->refs = 1;
771                 INIT_LIST_HEAD(&rec->backrefs);
772                 INIT_LIST_HEAD(&rec->orphan_extents);
773                 rec->holes = RB_ROOT;
774
775                 node = malloc(sizeof(*node));
776                 if (!node) {
777                         free(rec);
778                         return ERR_PTR(-ENOMEM);
779                 }
780                 node->cache.start = ino;
781                 node->cache.size = 1;
782                 node->data = rec;
783
784                 if (ino == BTRFS_FREE_INO_OBJECTID)
785                         rec->found_link = 1;
786
787                 ret = insert_cache_extent(inode_cache, &node->cache);
788                 if (ret)
789                         return ERR_PTR(-EEXIST);
790         }
791         return rec;
792 }
793
794 static void free_orphan_data_extents(struct list_head *orphan_extents)
795 {
796         struct orphan_data_extent *orphan;
797
798         while (!list_empty(orphan_extents)) {
799                 orphan = list_entry(orphan_extents->next,
800                                     struct orphan_data_extent, list);
801                 list_del(&orphan->list);
802                 free(orphan);
803         }
804 }
805
806 static void free_inode_rec(struct inode_record *rec)
807 {
808         struct inode_backref *backref;
809
810         if (--rec->refs > 0)
811                 return;
812
813         while (!list_empty(&rec->backrefs)) {
814                 backref = list_entry(rec->backrefs.next,
815                                      struct inode_backref, list);
816                 list_del(&backref->list);
817                 free(backref);
818         }
819         free_orphan_data_extents(&rec->orphan_extents);
820         free_file_extent_holes(&rec->holes);
821         free(rec);
822 }
823
824 static int can_free_inode_rec(struct inode_record *rec)
825 {
826         if (!rec->errors && rec->checked && rec->found_inode_item &&
827             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
828                 return 1;
829         return 0;
830 }
831
832 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
833                                  struct inode_record *rec)
834 {
835         struct cache_extent *cache;
836         struct inode_backref *tmp, *backref;
837         struct ptr_node *node;
838         unsigned char filetype;
839
840         if (!rec->found_inode_item)
841                 return;
842
843         filetype = imode_to_type(rec->imode);
844         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
845                 if (backref->found_dir_item && backref->found_dir_index) {
846                         if (backref->filetype != filetype)
847                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
848                         if (!backref->errors && backref->found_inode_ref &&
849                             rec->nlink == rec->found_link) {
850                                 list_del(&backref->list);
851                                 free(backref);
852                         }
853                 }
854         }
855
856         if (!rec->checked || rec->merging)
857                 return;
858
859         if (S_ISDIR(rec->imode)) {
860                 if (rec->found_size != rec->isize)
861                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
862                 if (rec->found_file_extent)
863                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
864         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
865                 if (rec->found_dir_item)
866                         rec->errors |= I_ERR_ODD_DIR_ITEM;
867                 if (rec->found_size != rec->nbytes)
868                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
869                 if (rec->nlink > 0 && !no_holes &&
870                     (rec->extent_end < rec->isize ||
871                      first_extent_gap(&rec->holes) < rec->isize))
872                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
873         }
874
875         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
876                 if (rec->found_csum_item && rec->nodatasum)
877                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
878                 if (rec->some_csum_missing && !rec->nodatasum)
879                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
880         }
881
882         BUG_ON(rec->refs != 1);
883         if (can_free_inode_rec(rec)) {
884                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
885                 node = container_of(cache, struct ptr_node, cache);
886                 BUG_ON(node->data != rec);
887                 remove_cache_extent(inode_cache, &node->cache);
888                 free(node);
889                 free_inode_rec(rec);
890         }
891 }
892
893 static int check_orphan_item(struct btrfs_root *root, u64 ino)
894 {
895         struct btrfs_path path;
896         struct btrfs_key key;
897         int ret;
898
899         key.objectid = BTRFS_ORPHAN_OBJECTID;
900         key.type = BTRFS_ORPHAN_ITEM_KEY;
901         key.offset = ino;
902
903         btrfs_init_path(&path);
904         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
905         btrfs_release_path(&path);
906         if (ret > 0)
907                 ret = -ENOENT;
908         return ret;
909 }
910
911 static int process_inode_item(struct extent_buffer *eb,
912                               int slot, struct btrfs_key *key,
913                               struct shared_node *active_node)
914 {
915         struct inode_record *rec;
916         struct btrfs_inode_item *item;
917
918         rec = active_node->current;
919         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
920         if (rec->found_inode_item) {
921                 rec->errors |= I_ERR_DUP_INODE_ITEM;
922                 return 1;
923         }
924         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
925         rec->nlink = btrfs_inode_nlink(eb, item);
926         rec->isize = btrfs_inode_size(eb, item);
927         rec->nbytes = btrfs_inode_nbytes(eb, item);
928         rec->imode = btrfs_inode_mode(eb, item);
929         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
930                 rec->nodatasum = 1;
931         rec->found_inode_item = 1;
932         if (rec->nlink == 0)
933                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
934         maybe_free_inode_rec(&active_node->inode_cache, rec);
935         return 0;
936 }
937
938 static struct inode_backref *get_inode_backref(struct inode_record *rec,
939                                                 const char *name,
940                                                 int namelen, u64 dir)
941 {
942         struct inode_backref *backref;
943
944         list_for_each_entry(backref, &rec->backrefs, list) {
945                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
946                         break;
947                 if (backref->dir != dir || backref->namelen != namelen)
948                         continue;
949                 if (memcmp(name, backref->name, namelen))
950                         continue;
951                 return backref;
952         }
953
954         backref = malloc(sizeof(*backref) + namelen + 1);
955         if (!backref)
956                 return NULL;
957         memset(backref, 0, sizeof(*backref));
958         backref->dir = dir;
959         backref->namelen = namelen;
960         memcpy(backref->name, name, namelen);
961         backref->name[namelen] = '\0';
962         list_add_tail(&backref->list, &rec->backrefs);
963         return backref;
964 }
965
966 static int add_inode_backref(struct cache_tree *inode_cache,
967                              u64 ino, u64 dir, u64 index,
968                              const char *name, int namelen,
969                              int filetype, int itemtype, int errors)
970 {
971         struct inode_record *rec;
972         struct inode_backref *backref;
973
974         rec = get_inode_rec(inode_cache, ino, 1);
975         BUG_ON(IS_ERR(rec));
976         backref = get_inode_backref(rec, name, namelen, dir);
977         BUG_ON(!backref);
978         if (errors)
979                 backref->errors |= errors;
980         if (itemtype == BTRFS_DIR_INDEX_KEY) {
981                 if (backref->found_dir_index)
982                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
983                 if (backref->found_inode_ref && backref->index != index)
984                         backref->errors |= REF_ERR_INDEX_UNMATCH;
985                 if (backref->found_dir_item && backref->filetype != filetype)
986                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
987
988                 backref->index = index;
989                 backref->filetype = filetype;
990                 backref->found_dir_index = 1;
991         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
992                 rec->found_link++;
993                 if (backref->found_dir_item)
994                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
995                 if (backref->found_dir_index && backref->filetype != filetype)
996                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
997
998                 backref->filetype = filetype;
999                 backref->found_dir_item = 1;
1000         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1001                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1002                 if (backref->found_inode_ref)
1003                         backref->errors |= REF_ERR_DUP_INODE_REF;
1004                 if (backref->found_dir_index && backref->index != index)
1005                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1006                 else
1007                         backref->index = index;
1008
1009                 backref->ref_type = itemtype;
1010                 backref->found_inode_ref = 1;
1011         } else {
1012                 BUG_ON(1);
1013         }
1014
1015         maybe_free_inode_rec(inode_cache, rec);
1016         return 0;
1017 }
1018
1019 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1020                             struct cache_tree *dst_cache)
1021 {
1022         struct inode_backref *backref;
1023         u32 dir_count = 0;
1024         int ret = 0;
1025
1026         dst->merging = 1;
1027         list_for_each_entry(backref, &src->backrefs, list) {
1028                 if (backref->found_dir_index) {
1029                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1030                                         backref->index, backref->name,
1031                                         backref->namelen, backref->filetype,
1032                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1033                 }
1034                 if (backref->found_dir_item) {
1035                         dir_count++;
1036                         add_inode_backref(dst_cache, dst->ino,
1037                                         backref->dir, 0, backref->name,
1038                                         backref->namelen, backref->filetype,
1039                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1040                 }
1041                 if (backref->found_inode_ref) {
1042                         add_inode_backref(dst_cache, dst->ino,
1043                                         backref->dir, backref->index,
1044                                         backref->name, backref->namelen, 0,
1045                                         backref->ref_type, backref->errors);
1046                 }
1047         }
1048
1049         if (src->found_dir_item)
1050                 dst->found_dir_item = 1;
1051         if (src->found_file_extent)
1052                 dst->found_file_extent = 1;
1053         if (src->found_csum_item)
1054                 dst->found_csum_item = 1;
1055         if (src->some_csum_missing)
1056                 dst->some_csum_missing = 1;
1057         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1058                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1059                 if (ret < 0)
1060                         return ret;
1061         }
1062
1063         BUG_ON(src->found_link < dir_count);
1064         dst->found_link += src->found_link - dir_count;
1065         dst->found_size += src->found_size;
1066         if (src->extent_start != (u64)-1) {
1067                 if (dst->extent_start == (u64)-1) {
1068                         dst->extent_start = src->extent_start;
1069                         dst->extent_end = src->extent_end;
1070                 } else {
1071                         if (dst->extent_end > src->extent_start)
1072                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1073                         else if (dst->extent_end < src->extent_start) {
1074                                 ret = add_file_extent_hole(&dst->holes,
1075                                         dst->extent_end,
1076                                         src->extent_start - dst->extent_end);
1077                         }
1078                         if (dst->extent_end < src->extent_end)
1079                                 dst->extent_end = src->extent_end;
1080                 }
1081         }
1082
1083         dst->errors |= src->errors;
1084         if (src->found_inode_item) {
1085                 if (!dst->found_inode_item) {
1086                         dst->nlink = src->nlink;
1087                         dst->isize = src->isize;
1088                         dst->nbytes = src->nbytes;
1089                         dst->imode = src->imode;
1090                         dst->nodatasum = src->nodatasum;
1091                         dst->found_inode_item = 1;
1092                 } else {
1093                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1094                 }
1095         }
1096         dst->merging = 0;
1097
1098         return 0;
1099 }
1100
1101 static int splice_shared_node(struct shared_node *src_node,
1102                               struct shared_node *dst_node)
1103 {
1104         struct cache_extent *cache;
1105         struct ptr_node *node, *ins;
1106         struct cache_tree *src, *dst;
1107         struct inode_record *rec, *conflict;
1108         u64 current_ino = 0;
1109         int splice = 0;
1110         int ret;
1111
1112         if (--src_node->refs == 0)
1113                 splice = 1;
1114         if (src_node->current)
1115                 current_ino = src_node->current->ino;
1116
1117         src = &src_node->root_cache;
1118         dst = &dst_node->root_cache;
1119 again:
1120         cache = search_cache_extent(src, 0);
1121         while (cache) {
1122                 node = container_of(cache, struct ptr_node, cache);
1123                 rec = node->data;
1124                 cache = next_cache_extent(cache);
1125
1126                 if (splice) {
1127                         remove_cache_extent(src, &node->cache);
1128                         ins = node;
1129                 } else {
1130                         ins = malloc(sizeof(*ins));
1131                         ins->cache.start = node->cache.start;
1132                         ins->cache.size = node->cache.size;
1133                         ins->data = rec;
1134                         rec->refs++;
1135                 }
1136                 ret = insert_cache_extent(dst, &ins->cache);
1137                 if (ret == -EEXIST) {
1138                         conflict = get_inode_rec(dst, rec->ino, 1);
1139                         BUG_ON(IS_ERR(conflict));
1140                         merge_inode_recs(rec, conflict, dst);
1141                         if (rec->checked) {
1142                                 conflict->checked = 1;
1143                                 if (dst_node->current == conflict)
1144                                         dst_node->current = NULL;
1145                         }
1146                         maybe_free_inode_rec(dst, conflict);
1147                         free_inode_rec(rec);
1148                         free(ins);
1149                 } else {
1150                         BUG_ON(ret);
1151                 }
1152         }
1153
1154         if (src == &src_node->root_cache) {
1155                 src = &src_node->inode_cache;
1156                 dst = &dst_node->inode_cache;
1157                 goto again;
1158         }
1159
1160         if (current_ino > 0 && (!dst_node->current ||
1161             current_ino > dst_node->current->ino)) {
1162                 if (dst_node->current) {
1163                         dst_node->current->checked = 1;
1164                         maybe_free_inode_rec(dst, dst_node->current);
1165                 }
1166                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1167                 BUG_ON(IS_ERR(dst_node->current));
1168         }
1169         return 0;
1170 }
1171
1172 static void free_inode_ptr(struct cache_extent *cache)
1173 {
1174         struct ptr_node *node;
1175         struct inode_record *rec;
1176
1177         node = container_of(cache, struct ptr_node, cache);
1178         rec = node->data;
1179         free_inode_rec(rec);
1180         free(node);
1181 }
1182
1183 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1184
1185 static struct shared_node *find_shared_node(struct cache_tree *shared,
1186                                             u64 bytenr)
1187 {
1188         struct cache_extent *cache;
1189         struct shared_node *node;
1190
1191         cache = lookup_cache_extent(shared, bytenr, 1);
1192         if (cache) {
1193                 node = container_of(cache, struct shared_node, cache);
1194                 return node;
1195         }
1196         return NULL;
1197 }
1198
1199 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1200 {
1201         int ret;
1202         struct shared_node *node;
1203
1204         node = calloc(1, sizeof(*node));
1205         if (!node)
1206                 return -ENOMEM;
1207         node->cache.start = bytenr;
1208         node->cache.size = 1;
1209         cache_tree_init(&node->root_cache);
1210         cache_tree_init(&node->inode_cache);
1211         node->refs = refs;
1212
1213         ret = insert_cache_extent(shared, &node->cache);
1214
1215         return ret;
1216 }
1217
1218 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1219                              struct walk_control *wc, int level)
1220 {
1221         struct shared_node *node;
1222         struct shared_node *dest;
1223         int ret;
1224
1225         if (level == wc->active_node)
1226                 return 0;
1227
1228         BUG_ON(wc->active_node <= level);
1229         node = find_shared_node(&wc->shared, bytenr);
1230         if (!node) {
1231                 ret = add_shared_node(&wc->shared, bytenr, refs);
1232                 BUG_ON(ret);
1233                 node = find_shared_node(&wc->shared, bytenr);
1234                 wc->nodes[level] = node;
1235                 wc->active_node = level;
1236                 return 0;
1237         }
1238
1239         if (wc->root_level == wc->active_node &&
1240             btrfs_root_refs(&root->root_item) == 0) {
1241                 if (--node->refs == 0) {
1242                         free_inode_recs_tree(&node->root_cache);
1243                         free_inode_recs_tree(&node->inode_cache);
1244                         remove_cache_extent(&wc->shared, &node->cache);
1245                         free(node);
1246                 }
1247                 return 1;
1248         }
1249
1250         dest = wc->nodes[wc->active_node];
1251         splice_shared_node(node, dest);
1252         if (node->refs == 0) {
1253                 remove_cache_extent(&wc->shared, &node->cache);
1254                 free(node);
1255         }
1256         return 1;
1257 }
1258
1259 static int leave_shared_node(struct btrfs_root *root,
1260                              struct walk_control *wc, int level)
1261 {
1262         struct shared_node *node;
1263         struct shared_node *dest;
1264         int i;
1265
1266         if (level == wc->root_level)
1267                 return 0;
1268
1269         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1270                 if (wc->nodes[i])
1271                         break;
1272         }
1273         BUG_ON(i >= BTRFS_MAX_LEVEL);
1274
1275         node = wc->nodes[wc->active_node];
1276         wc->nodes[wc->active_node] = NULL;
1277         wc->active_node = i;
1278
1279         dest = wc->nodes[wc->active_node];
1280         if (wc->active_node < wc->root_level ||
1281             btrfs_root_refs(&root->root_item) > 0) {
1282                 BUG_ON(node->refs <= 1);
1283                 splice_shared_node(node, dest);
1284         } else {
1285                 BUG_ON(node->refs < 2);
1286                 node->refs--;
1287         }
1288         return 0;
1289 }
1290
1291 /*
1292  * Returns:
1293  * < 0 - on error
1294  * 1   - if the root with id child_root_id is a child of root parent_root_id
1295  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1296  *       has other root(s) as parent(s)
1297  * 2   - if the root child_root_id doesn't have any parent roots
1298  */
1299 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1300                          u64 child_root_id)
1301 {
1302         struct btrfs_path path;
1303         struct btrfs_key key;
1304         struct extent_buffer *leaf;
1305         int has_parent = 0;
1306         int ret;
1307
1308         btrfs_init_path(&path);
1309
1310         key.objectid = parent_root_id;
1311         key.type = BTRFS_ROOT_REF_KEY;
1312         key.offset = child_root_id;
1313         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1314                                 0, 0);
1315         if (ret < 0)
1316                 return ret;
1317         btrfs_release_path(&path);
1318         if (!ret)
1319                 return 1;
1320
1321         key.objectid = child_root_id;
1322         key.type = BTRFS_ROOT_BACKREF_KEY;
1323         key.offset = 0;
1324         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1325                                 0, 0);
1326         if (ret < 0)
1327                 goto out;
1328
1329         while (1) {
1330                 leaf = path.nodes[0];
1331                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1332                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1333                         if (ret)
1334                                 break;
1335                         leaf = path.nodes[0];
1336                 }
1337
1338                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1339                 if (key.objectid != child_root_id ||
1340                     key.type != BTRFS_ROOT_BACKREF_KEY)
1341                         break;
1342
1343                 has_parent = 1;
1344
1345                 if (key.offset == parent_root_id) {
1346                         btrfs_release_path(&path);
1347                         return 1;
1348                 }
1349
1350                 path.slots[0]++;
1351         }
1352 out:
1353         btrfs_release_path(&path);
1354         if (ret < 0)
1355                 return ret;
1356         return has_parent ? 0 : 2;
1357 }
1358
1359 static int process_dir_item(struct btrfs_root *root,
1360                             struct extent_buffer *eb,
1361                             int slot, struct btrfs_key *key,
1362                             struct shared_node *active_node)
1363 {
1364         u32 total;
1365         u32 cur = 0;
1366         u32 len;
1367         u32 name_len;
1368         u32 data_len;
1369         int error;
1370         int nritems = 0;
1371         int filetype;
1372         struct btrfs_dir_item *di;
1373         struct inode_record *rec;
1374         struct cache_tree *root_cache;
1375         struct cache_tree *inode_cache;
1376         struct btrfs_key location;
1377         char namebuf[BTRFS_NAME_LEN];
1378
1379         root_cache = &active_node->root_cache;
1380         inode_cache = &active_node->inode_cache;
1381         rec = active_node->current;
1382         rec->found_dir_item = 1;
1383
1384         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1385         total = btrfs_item_size_nr(eb, slot);
1386         while (cur < total) {
1387                 nritems++;
1388                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1389                 name_len = btrfs_dir_name_len(eb, di);
1390                 data_len = btrfs_dir_data_len(eb, di);
1391                 filetype = btrfs_dir_type(eb, di);
1392
1393                 rec->found_size += name_len;
1394                 if (name_len <= BTRFS_NAME_LEN) {
1395                         len = name_len;
1396                         error = 0;
1397                 } else {
1398                         len = BTRFS_NAME_LEN;
1399                         error = REF_ERR_NAME_TOO_LONG;
1400                 }
1401                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1402
1403                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1404                         add_inode_backref(inode_cache, location.objectid,
1405                                           key->objectid, key->offset, namebuf,
1406                                           len, filetype, key->type, error);
1407                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1408                         add_inode_backref(root_cache, location.objectid,
1409                                           key->objectid, key->offset,
1410                                           namebuf, len, filetype,
1411                                           key->type, error);
1412                 } else {
1413                         fprintf(stderr, "invalid location in dir item %u\n",
1414                                 location.type);
1415                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1416                                           key->objectid, key->offset, namebuf,
1417                                           len, filetype, key->type, error);
1418                 }
1419
1420                 len = sizeof(*di) + name_len + data_len;
1421                 di = (struct btrfs_dir_item *)((char *)di + len);
1422                 cur += len;
1423         }
1424         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1425                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1426
1427         return 0;
1428 }
1429
1430 static int process_inode_ref(struct extent_buffer *eb,
1431                              int slot, struct btrfs_key *key,
1432                              struct shared_node *active_node)
1433 {
1434         u32 total;
1435         u32 cur = 0;
1436         u32 len;
1437         u32 name_len;
1438         u64 index;
1439         int error;
1440         struct cache_tree *inode_cache;
1441         struct btrfs_inode_ref *ref;
1442         char namebuf[BTRFS_NAME_LEN];
1443
1444         inode_cache = &active_node->inode_cache;
1445
1446         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1447         total = btrfs_item_size_nr(eb, slot);
1448         while (cur < total) {
1449                 name_len = btrfs_inode_ref_name_len(eb, ref);
1450                 index = btrfs_inode_ref_index(eb, ref);
1451                 if (name_len <= BTRFS_NAME_LEN) {
1452                         len = name_len;
1453                         error = 0;
1454                 } else {
1455                         len = BTRFS_NAME_LEN;
1456                         error = REF_ERR_NAME_TOO_LONG;
1457                 }
1458                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1459                 add_inode_backref(inode_cache, key->objectid, key->offset,
1460                                   index, namebuf, len, 0, key->type, error);
1461
1462                 len = sizeof(*ref) + name_len;
1463                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1464                 cur += len;
1465         }
1466         return 0;
1467 }
1468
1469 static int process_inode_extref(struct extent_buffer *eb,
1470                                 int slot, struct btrfs_key *key,
1471                                 struct shared_node *active_node)
1472 {
1473         u32 total;
1474         u32 cur = 0;
1475         u32 len;
1476         u32 name_len;
1477         u64 index;
1478         u64 parent;
1479         int error;
1480         struct cache_tree *inode_cache;
1481         struct btrfs_inode_extref *extref;
1482         char namebuf[BTRFS_NAME_LEN];
1483
1484         inode_cache = &active_node->inode_cache;
1485
1486         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1487         total = btrfs_item_size_nr(eb, slot);
1488         while (cur < total) {
1489                 name_len = btrfs_inode_extref_name_len(eb, extref);
1490                 index = btrfs_inode_extref_index(eb, extref);
1491                 parent = btrfs_inode_extref_parent(eb, extref);
1492                 if (name_len <= BTRFS_NAME_LEN) {
1493                         len = name_len;
1494                         error = 0;
1495                 } else {
1496                         len = BTRFS_NAME_LEN;
1497                         error = REF_ERR_NAME_TOO_LONG;
1498                 }
1499                 read_extent_buffer(eb, namebuf,
1500                                    (unsigned long)(extref + 1), len);
1501                 add_inode_backref(inode_cache, key->objectid, parent,
1502                                   index, namebuf, len, 0, key->type, error);
1503
1504                 len = sizeof(*extref) + name_len;
1505                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1506                 cur += len;
1507         }
1508         return 0;
1509
1510 }
1511
1512 static int count_csum_range(struct btrfs_root *root, u64 start,
1513                             u64 len, u64 *found)
1514 {
1515         struct btrfs_key key;
1516         struct btrfs_path path;
1517         struct extent_buffer *leaf;
1518         int ret;
1519         size_t size;
1520         *found = 0;
1521         u64 csum_end;
1522         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1523
1524         btrfs_init_path(&path);
1525
1526         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1527         key.offset = start;
1528         key.type = BTRFS_EXTENT_CSUM_KEY;
1529
1530         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1531                                 &key, &path, 0, 0);
1532         if (ret < 0)
1533                 goto out;
1534         if (ret > 0 && path.slots[0] > 0) {
1535                 leaf = path.nodes[0];
1536                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1537                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1538                     key.type == BTRFS_EXTENT_CSUM_KEY)
1539                         path.slots[0]--;
1540         }
1541
1542         while (len > 0) {
1543                 leaf = path.nodes[0];
1544                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1545                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1546                         if (ret > 0)
1547                                 break;
1548                         else if (ret < 0)
1549                                 goto out;
1550                         leaf = path.nodes[0];
1551                 }
1552
1553                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1554                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1555                     key.type != BTRFS_EXTENT_CSUM_KEY)
1556                         break;
1557
1558                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1559                 if (key.offset >= start + len)
1560                         break;
1561
1562                 if (key.offset > start)
1563                         start = key.offset;
1564
1565                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1566                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1567                 if (csum_end > start) {
1568                         size = min(csum_end - start, len);
1569                         len -= size;
1570                         start += size;
1571                         *found += size;
1572                 }
1573
1574                 path.slots[0]++;
1575         }
1576 out:
1577         btrfs_release_path(&path);
1578         if (ret < 0)
1579                 return ret;
1580         return 0;
1581 }
1582
1583 static int process_file_extent(struct btrfs_root *root,
1584                                 struct extent_buffer *eb,
1585                                 int slot, struct btrfs_key *key,
1586                                 struct shared_node *active_node)
1587 {
1588         struct inode_record *rec;
1589         struct btrfs_file_extent_item *fi;
1590         u64 num_bytes = 0;
1591         u64 disk_bytenr = 0;
1592         u64 extent_offset = 0;
1593         u64 mask = root->sectorsize - 1;
1594         int extent_type;
1595         int ret;
1596
1597         rec = active_node->current;
1598         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1599         rec->found_file_extent = 1;
1600
1601         if (rec->extent_start == (u64)-1) {
1602                 rec->extent_start = key->offset;
1603                 rec->extent_end = key->offset;
1604         }
1605
1606         if (rec->extent_end > key->offset)
1607                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1608         else if (rec->extent_end < key->offset) {
1609                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1610                                            key->offset - rec->extent_end);
1611                 if (ret < 0)
1612                         return ret;
1613         }
1614
1615         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1616         extent_type = btrfs_file_extent_type(eb, fi);
1617
1618         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1619                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1620                 if (num_bytes == 0)
1621                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1622                 rec->found_size += num_bytes;
1623                 num_bytes = (num_bytes + mask) & ~mask;
1624         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1625                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1626                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1627                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1628                 extent_offset = btrfs_file_extent_offset(eb, fi);
1629                 if (num_bytes == 0 || (num_bytes & mask))
1630                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1631                 if (num_bytes + extent_offset >
1632                     btrfs_file_extent_ram_bytes(eb, fi))
1633                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1634                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1635                     (btrfs_file_extent_compression(eb, fi) ||
1636                      btrfs_file_extent_encryption(eb, fi) ||
1637                      btrfs_file_extent_other_encoding(eb, fi)))
1638                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1639                 if (disk_bytenr > 0)
1640                         rec->found_size += num_bytes;
1641         } else {
1642                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1643         }
1644         rec->extent_end = key->offset + num_bytes;
1645
1646         /*
1647          * The data reloc tree will copy full extents into its inode and then
1648          * copy the corresponding csums.  Because the extent it copied could be
1649          * a preallocated extent that hasn't been written to yet there may be no
1650          * csums to copy, ergo we won't have csums for our file extent.  This is
1651          * ok so just don't bother checking csums if the inode belongs to the
1652          * data reloc tree.
1653          */
1654         if (disk_bytenr > 0 &&
1655             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1656                 u64 found;
1657                 if (btrfs_file_extent_compression(eb, fi))
1658                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1659                 else
1660                         disk_bytenr += extent_offset;
1661
1662                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1663                 if (ret < 0)
1664                         return ret;
1665                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1666                         if (found > 0)
1667                                 rec->found_csum_item = 1;
1668                         if (found < num_bytes)
1669                                 rec->some_csum_missing = 1;
1670                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1671                         if (found > 0)
1672                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1673                 }
1674         }
1675         return 0;
1676 }
1677
1678 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1679                             struct walk_control *wc)
1680 {
1681         struct btrfs_key key;
1682         u32 nritems;
1683         int i;
1684         int ret = 0;
1685         struct cache_tree *inode_cache;
1686         struct shared_node *active_node;
1687
1688         if (wc->root_level == wc->active_node &&
1689             btrfs_root_refs(&root->root_item) == 0)
1690                 return 0;
1691
1692         active_node = wc->nodes[wc->active_node];
1693         inode_cache = &active_node->inode_cache;
1694         nritems = btrfs_header_nritems(eb);
1695         for (i = 0; i < nritems; i++) {
1696                 btrfs_item_key_to_cpu(eb, &key, i);
1697
1698                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1699                         continue;
1700                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1701                         continue;
1702
1703                 if (active_node->current == NULL ||
1704                     active_node->current->ino < key.objectid) {
1705                         if (active_node->current) {
1706                                 active_node->current->checked = 1;
1707                                 maybe_free_inode_rec(inode_cache,
1708                                                      active_node->current);
1709                         }
1710                         active_node->current = get_inode_rec(inode_cache,
1711                                                              key.objectid, 1);
1712                         BUG_ON(IS_ERR(active_node->current));
1713                 }
1714                 switch (key.type) {
1715                 case BTRFS_DIR_ITEM_KEY:
1716                 case BTRFS_DIR_INDEX_KEY:
1717                         ret = process_dir_item(root, eb, i, &key, active_node);
1718                         break;
1719                 case BTRFS_INODE_REF_KEY:
1720                         ret = process_inode_ref(eb, i, &key, active_node);
1721                         break;
1722                 case BTRFS_INODE_EXTREF_KEY:
1723                         ret = process_inode_extref(eb, i, &key, active_node);
1724                         break;
1725                 case BTRFS_INODE_ITEM_KEY:
1726                         ret = process_inode_item(eb, i, &key, active_node);
1727                         break;
1728                 case BTRFS_EXTENT_DATA_KEY:
1729                         ret = process_file_extent(root, eb, i, &key,
1730                                                   active_node);
1731                         break;
1732                 default:
1733                         break;
1734                 };
1735         }
1736         return ret;
1737 }
1738
1739 static void reada_walk_down(struct btrfs_root *root,
1740                             struct extent_buffer *node, int slot)
1741 {
1742         u64 bytenr;
1743         u64 ptr_gen;
1744         u32 nritems;
1745         u32 blocksize;
1746         int i;
1747         int level;
1748
1749         level = btrfs_header_level(node);
1750         if (level != 1)
1751                 return;
1752
1753         nritems = btrfs_header_nritems(node);
1754         blocksize = btrfs_level_size(root, level - 1);
1755         for (i = slot; i < nritems; i++) {
1756                 bytenr = btrfs_node_blockptr(node, i);
1757                 ptr_gen = btrfs_node_ptr_generation(node, i);
1758                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1759         }
1760 }
1761
1762 /*
1763  * Check the child node/leaf by the following condition:
1764  * 1. the first item key of the node/leaf should be the same with the one
1765  *    in parent.
1766  * 2. block in parent node should match the child node/leaf.
1767  * 3. generation of parent node and child's header should be consistent.
1768  *
1769  * Or the child node/leaf pointed by the key in parent is not valid.
1770  *
1771  * We hope to check leaf owner too, but since subvol may share leaves,
1772  * which makes leaf owner check not so strong, key check should be
1773  * sufficient enough for that case.
1774  */
1775 static int check_child_node(struct btrfs_root *root,
1776                             struct extent_buffer *parent, int slot,
1777                             struct extent_buffer *child)
1778 {
1779         struct btrfs_key parent_key;
1780         struct btrfs_key child_key;
1781         int ret = 0;
1782
1783         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1784         if (btrfs_header_level(child) == 0)
1785                 btrfs_item_key_to_cpu(child, &child_key, 0);
1786         else
1787                 btrfs_node_key_to_cpu(child, &child_key, 0);
1788
1789         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1790                 ret = -EINVAL;
1791                 fprintf(stderr,
1792                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1793                         parent_key.objectid, parent_key.type, parent_key.offset,
1794                         child_key.objectid, child_key.type, child_key.offset);
1795         }
1796         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1797                 ret = -EINVAL;
1798                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1799                         btrfs_node_blockptr(parent, slot),
1800                         btrfs_header_bytenr(child));
1801         }
1802         if (btrfs_node_ptr_generation(parent, slot) !=
1803             btrfs_header_generation(child)) {
1804                 ret = -EINVAL;
1805                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1806                         btrfs_header_generation(child),
1807                         btrfs_node_ptr_generation(parent, slot));
1808         }
1809         return ret;
1810 }
1811
1812 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1813                           struct walk_control *wc, int *level)
1814 {
1815         enum btrfs_tree_block_status status;
1816         u64 bytenr;
1817         u64 ptr_gen;
1818         struct extent_buffer *next;
1819         struct extent_buffer *cur;
1820         u32 blocksize;
1821         int ret, err = 0;
1822         u64 refs;
1823
1824         WARN_ON(*level < 0);
1825         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1826         ret = btrfs_lookup_extent_info(NULL, root,
1827                                        path->nodes[*level]->start,
1828                                        *level, 1, &refs, NULL);
1829         if (ret < 0) {
1830                 err = ret;
1831                 goto out;
1832         }
1833
1834         if (refs > 1) {
1835                 ret = enter_shared_node(root, path->nodes[*level]->start,
1836                                         refs, wc, *level);
1837                 if (ret > 0) {
1838                         err = ret;
1839                         goto out;
1840                 }
1841         }
1842
1843         while (*level >= 0) {
1844                 WARN_ON(*level < 0);
1845                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1846                 cur = path->nodes[*level];
1847
1848                 if (btrfs_header_level(cur) != *level)
1849                         WARN_ON(1);
1850
1851                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1852                         break;
1853                 if (*level == 0) {
1854                         ret = process_one_leaf(root, cur, wc);
1855                         if (ret < 0)
1856                                 err = ret;
1857                         break;
1858                 }
1859                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1860                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1861                 blocksize = btrfs_level_size(root, *level - 1);
1862                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1863                                                1, &refs, NULL);
1864                 if (ret < 0)
1865                         refs = 0;
1866
1867                 if (refs > 1) {
1868                         ret = enter_shared_node(root, bytenr, refs,
1869                                                 wc, *level - 1);
1870                         if (ret > 0) {
1871                                 path->slots[*level]++;
1872                                 continue;
1873                         }
1874                 }
1875
1876                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1877                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1878                         free_extent_buffer(next);
1879                         reada_walk_down(root, cur, path->slots[*level]);
1880                         next = read_tree_block(root, bytenr, blocksize,
1881                                                ptr_gen);
1882                         if (!extent_buffer_uptodate(next)) {
1883                                 struct btrfs_key node_key;
1884
1885                                 btrfs_node_key_to_cpu(path->nodes[*level],
1886                                                       &node_key,
1887                                                       path->slots[*level]);
1888                                 btrfs_add_corrupt_extent_record(root->fs_info,
1889                                                 &node_key,
1890                                                 path->nodes[*level]->start,
1891                                                 root->leafsize, *level);
1892                                 err = -EIO;
1893                                 goto out;
1894                         }
1895                 }
1896
1897                 ret = check_child_node(root, cur, path->slots[*level], next);
1898                 if (ret) {
1899                         err = ret;
1900                         goto out;
1901                 }
1902
1903                 if (btrfs_is_leaf(next))
1904                         status = btrfs_check_leaf(root, NULL, next);
1905                 else
1906                         status = btrfs_check_node(root, NULL, next);
1907                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1908                         free_extent_buffer(next);
1909                         err = -EIO;
1910                         goto out;
1911                 }
1912
1913                 *level = *level - 1;
1914                 free_extent_buffer(path->nodes[*level]);
1915                 path->nodes[*level] = next;
1916                 path->slots[*level] = 0;
1917         }
1918 out:
1919         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1920         return err;
1921 }
1922
1923 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1924                         struct walk_control *wc, int *level)
1925 {
1926         int i;
1927         struct extent_buffer *leaf;
1928
1929         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1930                 leaf = path->nodes[i];
1931                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1932                         path->slots[i]++;
1933                         *level = i;
1934                         return 0;
1935                 } else {
1936                         free_extent_buffer(path->nodes[*level]);
1937                         path->nodes[*level] = NULL;
1938                         BUG_ON(*level > wc->active_node);
1939                         if (*level == wc->active_node)
1940                                 leave_shared_node(root, wc, *level);
1941                         *level = i + 1;
1942                 }
1943         }
1944         return 1;
1945 }
1946
1947 static int check_root_dir(struct inode_record *rec)
1948 {
1949         struct inode_backref *backref;
1950         int ret = -1;
1951
1952         if (!rec->found_inode_item || rec->errors)
1953                 goto out;
1954         if (rec->nlink != 1 || rec->found_link != 0)
1955                 goto out;
1956         if (list_empty(&rec->backrefs))
1957                 goto out;
1958         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1959         if (!backref->found_inode_ref)
1960                 goto out;
1961         if (backref->index != 0 || backref->namelen != 2 ||
1962             memcmp(backref->name, "..", 2))
1963                 goto out;
1964         if (backref->found_dir_index || backref->found_dir_item)
1965                 goto out;
1966         ret = 0;
1967 out:
1968         return ret;
1969 }
1970
1971 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1972                               struct btrfs_root *root, struct btrfs_path *path,
1973                               struct inode_record *rec)
1974 {
1975         struct btrfs_inode_item *ei;
1976         struct btrfs_key key;
1977         int ret;
1978
1979         key.objectid = rec->ino;
1980         key.type = BTRFS_INODE_ITEM_KEY;
1981         key.offset = (u64)-1;
1982
1983         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1984         if (ret < 0)
1985                 goto out;
1986         if (ret) {
1987                 if (!path->slots[0]) {
1988                         ret = -ENOENT;
1989                         goto out;
1990                 }
1991                 path->slots[0]--;
1992                 ret = 0;
1993         }
1994         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1995         if (key.objectid != rec->ino) {
1996                 ret = -ENOENT;
1997                 goto out;
1998         }
1999
2000         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2001                             struct btrfs_inode_item);
2002         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2003         btrfs_mark_buffer_dirty(path->nodes[0]);
2004         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2005         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2006                root->root_key.objectid);
2007 out:
2008         btrfs_release_path(path);
2009         return ret;
2010 }
2011
2012 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2013                                     struct btrfs_root *root,
2014                                     struct btrfs_path *path,
2015                                     struct inode_record *rec)
2016 {
2017         int ret;
2018
2019         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2020         btrfs_release_path(path);
2021         if (!ret)
2022                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2023         return ret;
2024 }
2025
2026 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2027                                struct btrfs_root *root,
2028                                struct btrfs_path *path,
2029                                struct inode_record *rec)
2030 {
2031         struct btrfs_inode_item *ei;
2032         struct btrfs_key key;
2033         int ret = 0;
2034
2035         key.objectid = rec->ino;
2036         key.type = BTRFS_INODE_ITEM_KEY;
2037         key.offset = 0;
2038
2039         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2040         if (ret) {
2041                 if (ret > 0)
2042                         ret = -ENOENT;
2043                 goto out;
2044         }
2045
2046         /* Since ret == 0, no need to check anything */
2047         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2048                             struct btrfs_inode_item);
2049         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2050         btrfs_mark_buffer_dirty(path->nodes[0]);
2051         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2052         printf("reset nbytes for ino %llu root %llu\n",
2053                rec->ino, root->root_key.objectid);
2054 out:
2055         btrfs_release_path(path);
2056         return ret;
2057 }
2058
2059 static int add_missing_dir_index(struct btrfs_root *root,
2060                                  struct cache_tree *inode_cache,
2061                                  struct inode_record *rec,
2062                                  struct inode_backref *backref)
2063 {
2064         struct btrfs_path *path;
2065         struct btrfs_trans_handle *trans;
2066         struct btrfs_dir_item *dir_item;
2067         struct extent_buffer *leaf;
2068         struct btrfs_key key;
2069         struct btrfs_disk_key disk_key;
2070         struct inode_record *dir_rec;
2071         unsigned long name_ptr;
2072         u32 data_size = sizeof(*dir_item) + backref->namelen;
2073         int ret;
2074
2075         path = btrfs_alloc_path();
2076         if (!path)
2077                 return -ENOMEM;
2078
2079         trans = btrfs_start_transaction(root, 1);
2080         if (IS_ERR(trans)) {
2081                 btrfs_free_path(path);
2082                 return PTR_ERR(trans);
2083         }
2084
2085         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2086                 (unsigned long long)rec->ino);
2087         key.objectid = backref->dir;
2088         key.type = BTRFS_DIR_INDEX_KEY;
2089         key.offset = backref->index;
2090
2091         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2092         BUG_ON(ret);
2093
2094         leaf = path->nodes[0];
2095         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2096
2097         disk_key.objectid = cpu_to_le64(rec->ino);
2098         disk_key.type = BTRFS_INODE_ITEM_KEY;
2099         disk_key.offset = 0;
2100
2101         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2102         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2103         btrfs_set_dir_data_len(leaf, dir_item, 0);
2104         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2105         name_ptr = (unsigned long)(dir_item + 1);
2106         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2107         btrfs_mark_buffer_dirty(leaf);
2108         btrfs_free_path(path);
2109         btrfs_commit_transaction(trans, root);
2110
2111         backref->found_dir_index = 1;
2112         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2113         BUG_ON(IS_ERR(dir_rec));
2114         if (!dir_rec)
2115                 return 0;
2116         dir_rec->found_size += backref->namelen;
2117         if (dir_rec->found_size == dir_rec->isize &&
2118             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2119                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2120         if (dir_rec->found_size != dir_rec->isize)
2121                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2122
2123         return 0;
2124 }
2125
2126 static int delete_dir_index(struct btrfs_root *root,
2127                             struct cache_tree *inode_cache,
2128                             struct inode_record *rec,
2129                             struct inode_backref *backref)
2130 {
2131         struct btrfs_trans_handle *trans;
2132         struct btrfs_dir_item *di;
2133         struct btrfs_path *path;
2134         int ret = 0;
2135
2136         path = btrfs_alloc_path();
2137         if (!path)
2138                 return -ENOMEM;
2139
2140         trans = btrfs_start_transaction(root, 1);
2141         if (IS_ERR(trans)) {
2142                 btrfs_free_path(path);
2143                 return PTR_ERR(trans);
2144         }
2145
2146
2147         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2148                 (unsigned long long)backref->dir,
2149                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2150                 (unsigned long long)root->objectid);
2151
2152         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2153                                     backref->name, backref->namelen,
2154                                     backref->index, -1);
2155         if (IS_ERR(di)) {
2156                 ret = PTR_ERR(di);
2157                 btrfs_free_path(path);
2158                 btrfs_commit_transaction(trans, root);
2159                 if (ret == -ENOENT)
2160                         return 0;
2161                 return ret;
2162         }
2163
2164         if (!di)
2165                 ret = btrfs_del_item(trans, root, path);
2166         else
2167                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2168         BUG_ON(ret);
2169         btrfs_free_path(path);
2170         btrfs_commit_transaction(trans, root);
2171         return ret;
2172 }
2173
2174 static int create_inode_item(struct btrfs_root *root,
2175                              struct inode_record *rec,
2176                              struct inode_backref *backref, int root_dir)
2177 {
2178         struct btrfs_trans_handle *trans;
2179         struct btrfs_inode_item inode_item;
2180         time_t now = time(NULL);
2181         int ret;
2182
2183         trans = btrfs_start_transaction(root, 1);
2184         if (IS_ERR(trans)) {
2185                 ret = PTR_ERR(trans);
2186                 return ret;
2187         }
2188
2189         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2190                 "be incomplete, please check permissions and content after "
2191                 "the fsck completes.\n", (unsigned long long)root->objectid,
2192                 (unsigned long long)rec->ino);
2193
2194         memset(&inode_item, 0, sizeof(inode_item));
2195         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2196         if (root_dir)
2197                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2198         else
2199                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2200         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2201         if (rec->found_dir_item) {
2202                 if (rec->found_file_extent)
2203                         fprintf(stderr, "root %llu inode %llu has both a dir "
2204                                 "item and extents, unsure if it is a dir or a "
2205                                 "regular file so setting it as a directory\n",
2206                                 (unsigned long long)root->objectid,
2207                                 (unsigned long long)rec->ino);
2208                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2209                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2210         } else if (!rec->found_dir_item) {
2211                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2212                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2213         }
2214         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2215         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2216         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2217         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2218         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2219         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2220         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2221         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2222
2223         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2224         BUG_ON(ret);
2225         btrfs_commit_transaction(trans, root);
2226         return 0;
2227 }
2228
2229 static int repair_inode_backrefs(struct btrfs_root *root,
2230                                  struct inode_record *rec,
2231                                  struct cache_tree *inode_cache,
2232                                  int delete)
2233 {
2234         struct inode_backref *tmp, *backref;
2235         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2236         int ret = 0;
2237         int repaired = 0;
2238
2239         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2240                 if (!delete && rec->ino == root_dirid) {
2241                         if (!rec->found_inode_item) {
2242                                 ret = create_inode_item(root, rec, backref, 1);
2243                                 if (ret)
2244                                         break;
2245                                 repaired++;
2246                         }
2247                 }
2248
2249                 /* Index 0 for root dir's are special, don't mess with it */
2250                 if (rec->ino == root_dirid && backref->index == 0)
2251                         continue;
2252
2253                 if (delete &&
2254                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2255                      (backref->found_dir_index && backref->found_inode_ref &&
2256                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2257                         ret = delete_dir_index(root, inode_cache, rec, backref);
2258                         if (ret)
2259                                 break;
2260                         repaired++;
2261                         list_del(&backref->list);
2262                         free(backref);
2263                 }
2264
2265                 if (!delete && !backref->found_dir_index &&
2266                     backref->found_dir_item && backref->found_inode_ref) {
2267                         ret = add_missing_dir_index(root, inode_cache, rec,
2268                                                     backref);
2269                         if (ret)
2270                                 break;
2271                         repaired++;
2272                         if (backref->found_dir_item &&
2273                             backref->found_dir_index &&
2274                             backref->found_dir_index) {
2275                                 if (!backref->errors &&
2276                                     backref->found_inode_ref) {
2277                                         list_del(&backref->list);
2278                                         free(backref);
2279                                 }
2280                         }
2281                 }
2282
2283                 if (!delete && (!backref->found_dir_index &&
2284                                 !backref->found_dir_item &&
2285                                 backref->found_inode_ref)) {
2286                         struct btrfs_trans_handle *trans;
2287                         struct btrfs_key location;
2288
2289                         ret = check_dir_conflict(root, backref->name,
2290                                                  backref->namelen,
2291                                                  backref->dir,
2292                                                  backref->index);
2293                         if (ret) {
2294                                 /*
2295                                  * let nlink fixing routine to handle it,
2296                                  * which can do it better.
2297                                  */
2298                                 ret = 0;
2299                                 break;
2300                         }
2301                         location.objectid = rec->ino;
2302                         location.type = BTRFS_INODE_ITEM_KEY;
2303                         location.offset = 0;
2304
2305                         trans = btrfs_start_transaction(root, 1);
2306                         if (IS_ERR(trans)) {
2307                                 ret = PTR_ERR(trans);
2308                                 break;
2309                         }
2310                         fprintf(stderr, "adding missing dir index/item pair "
2311                                 "for inode %llu\n",
2312                                 (unsigned long long)rec->ino);
2313                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2314                                                     backref->namelen,
2315                                                     backref->dir, &location,
2316                                                     imode_to_type(rec->imode),
2317                                                     backref->index);
2318                         BUG_ON(ret);
2319                         btrfs_commit_transaction(trans, root);
2320                         repaired++;
2321                 }
2322
2323                 if (!delete && (backref->found_inode_ref &&
2324                                 backref->found_dir_index &&
2325                                 backref->found_dir_item &&
2326                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2327                                 !rec->found_inode_item)) {
2328                         ret = create_inode_item(root, rec, backref, 0);
2329                         if (ret)
2330                                 break;
2331                         repaired++;
2332                 }
2333
2334         }
2335         return ret ? ret : repaired;
2336 }
2337
2338 /*
2339  * To determine the file type for nlink/inode_item repair
2340  *
2341  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2342  * Return -ENOENT if file type is not found.
2343  */
2344 static int find_file_type(struct inode_record *rec, u8 *type)
2345 {
2346         struct inode_backref *backref;
2347
2348         /* For inode item recovered case */
2349         if (rec->found_inode_item) {
2350                 *type = imode_to_type(rec->imode);
2351                 return 0;
2352         }
2353
2354         list_for_each_entry(backref, &rec->backrefs, list) {
2355                 if (backref->found_dir_index || backref->found_dir_item) {
2356                         *type = backref->filetype;
2357                         return 0;
2358                 }
2359         }
2360         return -ENOENT;
2361 }
2362
2363 /*
2364  * To determine the file name for nlink repair
2365  *
2366  * Return 0 if file name is found, set name and namelen.
2367  * Return -ENOENT if file name is not found.
2368  */
2369 static int find_file_name(struct inode_record *rec,
2370                           char *name, int *namelen)
2371 {
2372         struct inode_backref *backref;
2373
2374         list_for_each_entry(backref, &rec->backrefs, list) {
2375                 if (backref->found_dir_index || backref->found_dir_item ||
2376                     backref->found_inode_ref) {
2377                         memcpy(name, backref->name, backref->namelen);
2378                         *namelen = backref->namelen;
2379                         return 0;
2380                 }
2381         }
2382         return -ENOENT;
2383 }
2384
2385 /* Reset the nlink of the inode to the correct one */
2386 static int reset_nlink(struct btrfs_trans_handle *trans,
2387                        struct btrfs_root *root,
2388                        struct btrfs_path *path,
2389                        struct inode_record *rec)
2390 {
2391         struct inode_backref *backref;
2392         struct inode_backref *tmp;
2393         struct btrfs_key key;
2394         struct btrfs_inode_item *inode_item;
2395         int ret = 0;
2396
2397         /* We don't believe this either, reset it and iterate backref */
2398         rec->found_link = 0;
2399
2400         /* Remove all backref including the valid ones */
2401         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2402                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2403                                    backref->index, backref->name,
2404                                    backref->namelen, 0);
2405                 if (ret < 0)
2406                         goto out;
2407
2408                 /* remove invalid backref, so it won't be added back */
2409                 if (!(backref->found_dir_index &&
2410                       backref->found_dir_item &&
2411                       backref->found_inode_ref)) {
2412                         list_del(&backref->list);
2413                         free(backref);
2414                 } else {
2415                         rec->found_link++;
2416                 }
2417         }
2418
2419         /* Set nlink to 0 */
2420         key.objectid = rec->ino;
2421         key.type = BTRFS_INODE_ITEM_KEY;
2422         key.offset = 0;
2423         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2424         if (ret < 0)
2425                 goto out;
2426         if (ret > 0) {
2427                 ret = -ENOENT;
2428                 goto out;
2429         }
2430         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2431                                     struct btrfs_inode_item);
2432         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2433         btrfs_mark_buffer_dirty(path->nodes[0]);
2434         btrfs_release_path(path);
2435
2436         /*
2437          * Add back valid inode_ref/dir_item/dir_index,
2438          * add_link() will handle the nlink inc, so new nlink must be correct
2439          */
2440         list_for_each_entry(backref, &rec->backrefs, list) {
2441                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2442                                      backref->name, backref->namelen,
2443                                      backref->filetype, &backref->index, 1);
2444                 if (ret < 0)
2445                         goto out;
2446         }
2447 out:
2448         btrfs_release_path(path);
2449         return ret;
2450 }
2451
2452 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2453                                struct btrfs_root *root,
2454                                struct btrfs_path *path,
2455                                struct inode_record *rec)
2456 {
2457         char *dir_name = "lost+found";
2458         char namebuf[BTRFS_NAME_LEN] = {0};
2459         u64 lost_found_ino;
2460         u32 mode = 0700;
2461         u8 type = 0;
2462         int namelen = 0;
2463         int name_recovered = 0;
2464         int type_recovered = 0;
2465         int ret = 0;
2466
2467         /*
2468          * Get file name and type first before these invalid inode ref
2469          * are deleted by remove_all_invalid_backref()
2470          */
2471         name_recovered = !find_file_name(rec, namebuf, &namelen);
2472         type_recovered = !find_file_type(rec, &type);
2473
2474         if (!name_recovered) {
2475                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2476                        rec->ino, rec->ino);
2477                 namelen = count_digits(rec->ino);
2478                 sprintf(namebuf, "%llu", rec->ino);
2479                 name_recovered = 1;
2480         }
2481         if (!type_recovered) {
2482                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2483                        rec->ino);
2484                 type = BTRFS_FT_REG_FILE;
2485                 type_recovered = 1;
2486         }
2487
2488         ret = reset_nlink(trans, root, path, rec);
2489         if (ret < 0) {
2490                 fprintf(stderr,
2491                         "Failed to reset nlink for inode %llu: %s\n",
2492                         rec->ino, strerror(-ret));
2493                 goto out;
2494         }
2495
2496         if (rec->found_link == 0) {
2497                 lost_found_ino = root->highest_inode;
2498                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2499                         ret = -EOVERFLOW;
2500                         goto out;
2501                 }
2502                 lost_found_ino++;
2503                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2504                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2505                                   mode);
2506                 if (ret < 0) {
2507                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2508                                 dir_name, strerror(-ret));
2509                         goto out;
2510                 }
2511                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2512                                      namebuf, namelen, type, NULL, 1);
2513                 /*
2514                  * Add ".INO" suffix several times to handle case where
2515                  * "FILENAME.INO" is already taken by another file.
2516                  */
2517                 while (ret == -EEXIST) {
2518                         /*
2519                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2520                          */
2521                         if (namelen + count_digits(rec->ino) + 1 >
2522                             BTRFS_NAME_LEN) {
2523                                 ret = -EFBIG;
2524                                 goto out;
2525                         }
2526                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2527                                  ".%llu", rec->ino);
2528                         namelen += count_digits(rec->ino) + 1;
2529                         ret = btrfs_add_link(trans, root, rec->ino,
2530                                              lost_found_ino, namebuf,
2531                                              namelen, type, NULL, 1);
2532                 }
2533                 if (ret < 0) {
2534                         fprintf(stderr,
2535                                 "Failed to link the inode %llu to %s dir: %s\n",
2536                                 rec->ino, dir_name, strerror(-ret));
2537                         goto out;
2538                 }
2539                 /*
2540                  * Just increase the found_link, don't actually add the
2541                  * backref. This will make things easier and this inode
2542                  * record will be freed after the repair is done.
2543                  * So fsck will not report problem about this inode.
2544                  */
2545                 rec->found_link++;
2546                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2547                        namelen, namebuf, dir_name);
2548         }
2549         printf("Fixed the nlink of inode %llu\n", rec->ino);
2550 out:
2551         /*
2552          * Clear the flag anyway, or we will loop forever for the same inode
2553          * as it will not be removed from the bad inode list and the dead loop
2554          * happens.
2555          */
2556         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2557         btrfs_release_path(path);
2558         return ret;
2559 }
2560
2561 /*
2562  * Check if there is any normal(reg or prealloc) file extent for given
2563  * ino.
2564  * This is used to determine the file type when neither its dir_index/item or
2565  * inode_item exists.
2566  *
2567  * This will *NOT* report error, if any error happens, just consider it does
2568  * not have any normal file extent.
2569  */
2570 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2571 {
2572         struct btrfs_path *path;
2573         struct btrfs_key key;
2574         struct btrfs_key found_key;
2575         struct btrfs_file_extent_item *fi;
2576         u8 type;
2577         int ret = 0;
2578
2579         path = btrfs_alloc_path();
2580         if (!path)
2581                 goto out;
2582         key.objectid = ino;
2583         key.type = BTRFS_EXTENT_DATA_KEY;
2584         key.offset = 0;
2585
2586         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2587         if (ret < 0) {
2588                 ret = 0;
2589                 goto out;
2590         }
2591         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2592                 ret = btrfs_next_leaf(root, path);
2593                 if (ret) {
2594                         ret = 0;
2595                         goto out;
2596                 }
2597         }
2598         while (1) {
2599                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2600                                       path->slots[0]);
2601                 if (found_key.objectid != ino ||
2602                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2603                         break;
2604                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2605                                     struct btrfs_file_extent_item);
2606                 type = btrfs_file_extent_type(path->nodes[0], fi);
2607                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2608                         ret = 1;
2609                         goto out;
2610                 }
2611         }
2612 out:
2613         btrfs_free_path(path);
2614         return ret;
2615 }
2616
2617 static u32 btrfs_type_to_imode(u8 type)
2618 {
2619         static u32 imode_by_btrfs_type[] = {
2620                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2621                 [BTRFS_FT_DIR]          = S_IFDIR,
2622                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2623                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2624                 [BTRFS_FT_FIFO]         = S_IFIFO,
2625                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2626                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2627         };
2628
2629         return imode_by_btrfs_type[(type)];
2630 }
2631
2632 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2633                                 struct btrfs_root *root,
2634                                 struct btrfs_path *path,
2635                                 struct inode_record *rec)
2636 {
2637         u8 filetype;
2638         u32 mode = 0700;
2639         int type_recovered = 0;
2640         int ret = 0;
2641
2642         printf("Trying to rebuild inode:%llu\n", rec->ino);
2643
2644         type_recovered = !find_file_type(rec, &filetype);
2645
2646         /*
2647          * Try to determine inode type if type not found.
2648          *
2649          * For found regular file extent, it must be FILE.
2650          * For found dir_item/index, it must be DIR.
2651          *
2652          * For undetermined one, use FILE as fallback.
2653          *
2654          * TODO:
2655          * 1. If found backref(inode_index/item is already handled) to it,
2656          *    it must be DIR.
2657          *    Need new inode-inode ref structure to allow search for that.
2658          */
2659         if (!type_recovered) {
2660                 if (rec->found_file_extent &&
2661                     find_normal_file_extent(root, rec->ino)) {
2662                         type_recovered = 1;
2663                         filetype = BTRFS_FT_REG_FILE;
2664                 } else if (rec->found_dir_item) {
2665                         type_recovered = 1;
2666                         filetype = BTRFS_FT_DIR;
2667                 } else if (!list_empty(&rec->orphan_extents)) {
2668                         type_recovered = 1;
2669                         filetype = BTRFS_FT_REG_FILE;
2670                 } else{
2671                         printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2672                                rec->ino);
2673                         type_recovered = 1;
2674                         filetype = BTRFS_FT_REG_FILE;
2675                 }
2676         }
2677
2678         ret = btrfs_new_inode(trans, root, rec->ino,
2679                               mode | btrfs_type_to_imode(filetype));
2680         if (ret < 0)
2681                 goto out;
2682
2683         /*
2684          * Here inode rebuild is done, we only rebuild the inode item,
2685          * don't repair the nlink(like move to lost+found).
2686          * That is the job of nlink repair.
2687          *
2688          * We just fill the record and return
2689          */
2690         rec->found_dir_item = 1;
2691         rec->imode = mode | btrfs_type_to_imode(filetype);
2692         rec->nlink = 0;
2693         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2694         /* Ensure the inode_nlinks repair function will be called */
2695         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2696 out:
2697         return ret;
2698 }
2699
2700 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2701                                       struct btrfs_root *root,
2702                                       struct btrfs_path *path,
2703                                       struct inode_record *rec)
2704 {
2705         struct orphan_data_extent *orphan;
2706         struct orphan_data_extent *tmp;
2707         int ret = 0;
2708
2709         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2710                 /*
2711                  * Check for conflicting file extents
2712                  *
2713                  * Here we don't know whether the extents is compressed or not,
2714                  * so we can only assume it not compressed nor data offset,
2715                  * and use its disk_len as extent length.
2716                  */
2717                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2718                                        orphan->offset, orphan->disk_len, 0);
2719                 btrfs_release_path(path);
2720                 if (ret < 0)
2721                         goto out;
2722                 if (!ret) {
2723                         fprintf(stderr,
2724                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2725                                 orphan->disk_bytenr, orphan->disk_len);
2726                         ret = btrfs_free_extent(trans,
2727                                         root->fs_info->extent_root,
2728                                         orphan->disk_bytenr, orphan->disk_len,
2729                                         0, root->objectid, orphan->objectid,
2730                                         orphan->offset);
2731                         if (ret < 0)
2732                                 goto out;
2733                 }
2734                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2735                                 orphan->offset, orphan->disk_bytenr,
2736                                 orphan->disk_len, orphan->disk_len);
2737                 if (ret < 0)
2738                         goto out;
2739
2740                 /* Update file size info */
2741                 rec->found_size += orphan->disk_len;
2742                 if (rec->found_size == rec->nbytes)
2743                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2744
2745                 /* Update the file extent hole info too */
2746                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2747                                            orphan->disk_len);
2748                 if (ret < 0)
2749                         goto out;
2750                 if (RB_EMPTY_ROOT(&rec->holes))
2751                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2752
2753                 list_del(&orphan->list);
2754                 free(orphan);
2755         }
2756         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2757 out:
2758         return ret;
2759 }
2760
2761 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2762                                         struct btrfs_root *root,
2763                                         struct btrfs_path *path,
2764                                         struct inode_record *rec)
2765 {
2766         struct rb_node *node;
2767         struct file_extent_hole *hole;
2768         int found = 0;
2769         int ret = 0;
2770
2771         node = rb_first(&rec->holes);
2772
2773         while (node) {
2774                 found = 1;
2775                 hole = rb_entry(node, struct file_extent_hole, node);
2776                 ret = btrfs_punch_hole(trans, root, rec->ino,
2777                                        hole->start, hole->len);
2778                 if (ret < 0)
2779                         goto out;
2780                 ret = del_file_extent_hole(&rec->holes, hole->start,
2781                                            hole->len);
2782                 if (ret < 0)
2783                         goto out;
2784                 if (RB_EMPTY_ROOT(&rec->holes))
2785                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2786                 node = rb_first(&rec->holes);
2787         }
2788         /* special case for a file losing all its file extent */
2789         if (!found) {
2790                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2791                                        round_up(rec->isize, root->sectorsize));
2792                 if (ret < 0)
2793                         goto out;
2794         }
2795         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2796                rec->ino, root->objectid);
2797 out:
2798         return ret;
2799 }
2800
2801 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2802 {
2803         struct btrfs_trans_handle *trans;
2804         struct btrfs_path *path;
2805         int ret = 0;
2806
2807         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2808                              I_ERR_NO_ORPHAN_ITEM |
2809                              I_ERR_LINK_COUNT_WRONG |
2810                              I_ERR_NO_INODE_ITEM |
2811                              I_ERR_FILE_EXTENT_ORPHAN |
2812                              I_ERR_FILE_EXTENT_DISCOUNT|
2813                              I_ERR_FILE_NBYTES_WRONG)))
2814                 return rec->errors;
2815
2816         path = btrfs_alloc_path();
2817         if (!path)
2818                 return -ENOMEM;
2819
2820         /*
2821          * For nlink repair, it may create a dir and add link, so
2822          * 2 for parent(256)'s dir_index and dir_item
2823          * 2 for lost+found dir's inode_item and inode_ref
2824          * 1 for the new inode_ref of the file
2825          * 2 for lost+found dir's dir_index and dir_item for the file
2826          */
2827         trans = btrfs_start_transaction(root, 7);
2828         if (IS_ERR(trans)) {
2829                 btrfs_free_path(path);
2830                 return PTR_ERR(trans);
2831         }
2832
2833         if (rec->errors & I_ERR_NO_INODE_ITEM)
2834                 ret = repair_inode_no_item(trans, root, path, rec);
2835         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2836                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2837         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2838                 ret = repair_inode_discount_extent(trans, root, path, rec);
2839         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2840                 ret = repair_inode_isize(trans, root, path, rec);
2841         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2842                 ret = repair_inode_orphan_item(trans, root, path, rec);
2843         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2844                 ret = repair_inode_nlinks(trans, root, path, rec);
2845         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2846                 ret = repair_inode_nbytes(trans, root, path, rec);
2847         btrfs_commit_transaction(trans, root);
2848         btrfs_free_path(path);
2849         return ret;
2850 }
2851
2852 static int check_inode_recs(struct btrfs_root *root,
2853                             struct cache_tree *inode_cache)
2854 {
2855         struct cache_extent *cache;
2856         struct ptr_node *node;
2857         struct inode_record *rec;
2858         struct inode_backref *backref;
2859         int stage = 0;
2860         int ret = 0;
2861         int err = 0;
2862         u64 error = 0;
2863         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2864
2865         if (btrfs_root_refs(&root->root_item) == 0) {
2866                 if (!cache_tree_empty(inode_cache))
2867                         fprintf(stderr, "warning line %d\n", __LINE__);
2868                 return 0;
2869         }
2870
2871         /*
2872          * We need to record the highest inode number for later 'lost+found'
2873          * dir creation.
2874          * We must select a ino not used/refered by any existing inode, or
2875          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2876          * this may cause 'lost+found' dir has wrong nlinks.
2877          */
2878         cache = last_cache_extent(inode_cache);
2879         if (cache) {
2880                 node = container_of(cache, struct ptr_node, cache);
2881                 rec = node->data;
2882                 if (rec->ino > root->highest_inode)
2883                         root->highest_inode = rec->ino;
2884         }
2885
2886         /*
2887          * We need to repair backrefs first because we could change some of the
2888          * errors in the inode recs.
2889          *
2890          * We also need to go through and delete invalid backrefs first and then
2891          * add the correct ones second.  We do this because we may get EEXIST
2892          * when adding back the correct index because we hadn't yet deleted the
2893          * invalid index.
2894          *
2895          * For example, if we were missing a dir index then the directories
2896          * isize would be wrong, so if we fixed the isize to what we thought it
2897          * would be and then fixed the backref we'd still have a invalid fs, so
2898          * we need to add back the dir index and then check to see if the isize
2899          * is still wrong.
2900          */
2901         while (stage < 3) {
2902                 stage++;
2903                 if (stage == 3 && !err)
2904                         break;
2905
2906                 cache = search_cache_extent(inode_cache, 0);
2907                 while (repair && cache) {
2908                         node = container_of(cache, struct ptr_node, cache);
2909                         rec = node->data;
2910                         cache = next_cache_extent(cache);
2911
2912                         /* Need to free everything up and rescan */
2913                         if (stage == 3) {
2914                                 remove_cache_extent(inode_cache, &node->cache);
2915                                 free(node);
2916                                 free_inode_rec(rec);
2917                                 continue;
2918                         }
2919
2920                         if (list_empty(&rec->backrefs))
2921                                 continue;
2922
2923                         ret = repair_inode_backrefs(root, rec, inode_cache,
2924                                                     stage == 1);
2925                         if (ret < 0) {
2926                                 err = ret;
2927                                 stage = 2;
2928                                 break;
2929                         } if (ret > 0) {
2930                                 err = -EAGAIN;
2931                         }
2932                 }
2933         }
2934         if (err)
2935                 return err;
2936
2937         rec = get_inode_rec(inode_cache, root_dirid, 0);
2938         BUG_ON(IS_ERR(rec));
2939         if (rec) {
2940                 ret = check_root_dir(rec);
2941                 if (ret) {
2942                         fprintf(stderr, "root %llu root dir %llu error\n",
2943                                 (unsigned long long)root->root_key.objectid,
2944                                 (unsigned long long)root_dirid);
2945                         print_inode_error(root, rec);
2946                         error++;
2947                 }
2948         } else {
2949                 if (repair) {
2950                         struct btrfs_trans_handle *trans;
2951
2952                         trans = btrfs_start_transaction(root, 1);
2953                         if (IS_ERR(trans)) {
2954                                 err = PTR_ERR(trans);
2955                                 return err;
2956                         }
2957
2958                         fprintf(stderr,
2959                                 "root %llu missing its root dir, recreating\n",
2960                                 (unsigned long long)root->objectid);
2961
2962                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2963                         BUG_ON(ret);
2964
2965                         btrfs_commit_transaction(trans, root);
2966                         return -EAGAIN;
2967                 }
2968
2969                 fprintf(stderr, "root %llu root dir %llu not found\n",
2970                         (unsigned long long)root->root_key.objectid,
2971                         (unsigned long long)root_dirid);
2972         }
2973
2974         while (1) {
2975                 cache = search_cache_extent(inode_cache, 0);
2976                 if (!cache)
2977                         break;
2978                 node = container_of(cache, struct ptr_node, cache);
2979                 rec = node->data;
2980                 remove_cache_extent(inode_cache, &node->cache);
2981                 free(node);
2982                 if (rec->ino == root_dirid ||
2983                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2984                         free_inode_rec(rec);
2985                         continue;
2986                 }
2987
2988                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2989                         ret = check_orphan_item(root, rec->ino);
2990                         if (ret == 0)
2991                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2992                         if (can_free_inode_rec(rec)) {
2993                                 free_inode_rec(rec);
2994                                 continue;
2995                         }
2996                 }
2997
2998                 if (!rec->found_inode_item)
2999                         rec->errors |= I_ERR_NO_INODE_ITEM;
3000                 if (rec->found_link != rec->nlink)
3001                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3002                 if (repair) {
3003                         ret = try_repair_inode(root, rec);
3004                         if (ret == 0 && can_free_inode_rec(rec)) {
3005                                 free_inode_rec(rec);
3006                                 continue;
3007                         }
3008                         ret = 0;
3009                 }
3010
3011                 if (!(repair && ret == 0))
3012                         error++;
3013                 print_inode_error(root, rec);
3014                 list_for_each_entry(backref, &rec->backrefs, list) {
3015                         if (!backref->found_dir_item)
3016                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3017                         if (!backref->found_dir_index)
3018                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3019                         if (!backref->found_inode_ref)
3020                                 backref->errors |= REF_ERR_NO_INODE_REF;
3021                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3022                                 " namelen %u name %s filetype %d errors %x",
3023                                 (unsigned long long)backref->dir,
3024                                 (unsigned long long)backref->index,
3025                                 backref->namelen, backref->name,
3026                                 backref->filetype, backref->errors);
3027                         print_ref_error(backref->errors);
3028                 }
3029                 free_inode_rec(rec);
3030         }
3031         return (error > 0) ? -1 : 0;
3032 }
3033
3034 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3035                                         u64 objectid)
3036 {
3037         struct cache_extent *cache;
3038         struct root_record *rec = NULL;
3039         int ret;
3040
3041         cache = lookup_cache_extent(root_cache, objectid, 1);
3042         if (cache) {
3043                 rec = container_of(cache, struct root_record, cache);
3044         } else {
3045                 rec = calloc(1, sizeof(*rec));
3046                 if (!rec)
3047                         return ERR_PTR(-ENOMEM);
3048                 rec->objectid = objectid;
3049                 INIT_LIST_HEAD(&rec->backrefs);
3050                 rec->cache.start = objectid;
3051                 rec->cache.size = 1;
3052
3053                 ret = insert_cache_extent(root_cache, &rec->cache);
3054                 if (ret)
3055                         return ERR_PTR(-EEXIST);
3056         }
3057         return rec;
3058 }
3059
3060 static struct root_backref *get_root_backref(struct root_record *rec,
3061                                              u64 ref_root, u64 dir, u64 index,
3062                                              const char *name, int namelen)
3063 {
3064         struct root_backref *backref;
3065
3066         list_for_each_entry(backref, &rec->backrefs, list) {
3067                 if (backref->ref_root != ref_root || backref->dir != dir ||
3068                     backref->namelen != namelen)
3069                         continue;
3070                 if (memcmp(name, backref->name, namelen))
3071                         continue;
3072                 return backref;
3073         }
3074
3075         backref = calloc(1, sizeof(*backref) + namelen + 1);
3076         if (!backref)
3077                 return NULL;
3078         backref->ref_root = ref_root;
3079         backref->dir = dir;
3080         backref->index = index;
3081         backref->namelen = namelen;
3082         memcpy(backref->name, name, namelen);
3083         backref->name[namelen] = '\0';
3084         list_add_tail(&backref->list, &rec->backrefs);
3085         return backref;
3086 }
3087
3088 static void free_root_record(struct cache_extent *cache)
3089 {
3090         struct root_record *rec;
3091         struct root_backref *backref;
3092
3093         rec = container_of(cache, struct root_record, cache);
3094         while (!list_empty(&rec->backrefs)) {
3095                 backref = list_entry(rec->backrefs.next,
3096                                      struct root_backref, list);
3097                 list_del(&backref->list);
3098                 free(backref);
3099         }
3100
3101         kfree(rec);
3102 }
3103
3104 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3105
3106 static int add_root_backref(struct cache_tree *root_cache,
3107                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3108                             const char *name, int namelen,
3109                             int item_type, int errors)
3110 {
3111         struct root_record *rec;
3112         struct root_backref *backref;
3113
3114         rec = get_root_rec(root_cache, root_id);
3115         BUG_ON(IS_ERR(rec));
3116         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3117         BUG_ON(!backref);
3118
3119         backref->errors |= errors;
3120
3121         if (item_type != BTRFS_DIR_ITEM_KEY) {
3122                 if (backref->found_dir_index || backref->found_back_ref ||
3123                     backref->found_forward_ref) {
3124                         if (backref->index != index)
3125                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3126                 } else {
3127                         backref->index = index;
3128                 }
3129         }
3130
3131         if (item_type == BTRFS_DIR_ITEM_KEY) {
3132                 if (backref->found_forward_ref)
3133                         rec->found_ref++;
3134                 backref->found_dir_item = 1;
3135         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3136                 backref->found_dir_index = 1;
3137         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3138                 if (backref->found_forward_ref)
3139                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3140                 else if (backref->found_dir_item)
3141                         rec->found_ref++;
3142                 backref->found_forward_ref = 1;
3143         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3144                 if (backref->found_back_ref)
3145                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3146                 backref->found_back_ref = 1;
3147         } else {
3148                 BUG_ON(1);
3149         }
3150
3151         if (backref->found_forward_ref && backref->found_dir_item)
3152                 backref->reachable = 1;
3153         return 0;
3154 }
3155
3156 static int merge_root_recs(struct btrfs_root *root,
3157                            struct cache_tree *src_cache,
3158                            struct cache_tree *dst_cache)
3159 {
3160         struct cache_extent *cache;
3161         struct ptr_node *node;
3162         struct inode_record *rec;
3163         struct inode_backref *backref;
3164         int ret = 0;
3165
3166         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3167                 free_inode_recs_tree(src_cache);
3168                 return 0;
3169         }
3170
3171         while (1) {
3172                 cache = search_cache_extent(src_cache, 0);
3173                 if (!cache)
3174                         break;
3175                 node = container_of(cache, struct ptr_node, cache);
3176                 rec = node->data;
3177                 remove_cache_extent(src_cache, &node->cache);
3178                 free(node);
3179
3180                 ret = is_child_root(root, root->objectid, rec->ino);
3181                 if (ret < 0)
3182                         break;
3183                 else if (ret == 0)
3184                         goto skip;
3185
3186                 list_for_each_entry(backref, &rec->backrefs, list) {
3187                         BUG_ON(backref->found_inode_ref);
3188                         if (backref->found_dir_item)
3189                                 add_root_backref(dst_cache, rec->ino,
3190                                         root->root_key.objectid, backref->dir,
3191                                         backref->index, backref->name,
3192                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3193                                         backref->errors);
3194                         if (backref->found_dir_index)
3195                                 add_root_backref(dst_cache, rec->ino,
3196                                         root->root_key.objectid, backref->dir,
3197                                         backref->index, backref->name,
3198                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3199                                         backref->errors);
3200                 }
3201 skip:
3202                 free_inode_rec(rec);
3203         }
3204         if (ret < 0)
3205                 return ret;
3206         return 0;
3207 }
3208
3209 static int check_root_refs(struct btrfs_root *root,
3210                            struct cache_tree *root_cache)
3211 {
3212         struct root_record *rec;
3213         struct root_record *ref_root;
3214         struct root_backref *backref;
3215         struct cache_extent *cache;
3216         int loop = 1;
3217         int ret;
3218         int error;
3219         int errors = 0;
3220
3221         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3222         BUG_ON(IS_ERR(rec));
3223         rec->found_ref = 1;
3224
3225         /* fixme: this can not detect circular references */
3226         while (loop) {
3227                 loop = 0;
3228                 cache = search_cache_extent(root_cache, 0);
3229                 while (1) {
3230                         if (!cache)
3231                                 break;
3232                         rec = container_of(cache, struct root_record, cache);
3233                         cache = next_cache_extent(cache);
3234
3235                         if (rec->found_ref == 0)
3236                                 continue;
3237
3238                         list_for_each_entry(backref, &rec->backrefs, list) {
3239                                 if (!backref->reachable)
3240                                         continue;
3241
3242                                 ref_root = get_root_rec(root_cache,
3243                                                         backref->ref_root);
3244                                 BUG_ON(IS_ERR(ref_root));
3245                                 if (ref_root->found_ref > 0)
3246                                         continue;
3247
3248                                 backref->reachable = 0;
3249                                 rec->found_ref--;
3250                                 if (rec->found_ref == 0)
3251                                         loop = 1;
3252                         }
3253                 }
3254         }
3255
3256         cache = search_cache_extent(root_cache, 0);
3257         while (1) {
3258                 if (!cache)
3259                         break;
3260                 rec = container_of(cache, struct root_record, cache);
3261                 cache = next_cache_extent(cache);
3262
3263                 if (rec->found_ref == 0 &&
3264                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3265                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3266                         ret = check_orphan_item(root->fs_info->tree_root,
3267                                                 rec->objectid);
3268                         if (ret == 0)
3269                                 continue;
3270
3271                         /*
3272                          * If we don't have a root item then we likely just have
3273                          * a dir item in a snapshot for this root but no actual
3274                          * ref key or anything so it's meaningless.
3275                          */
3276                         if (!rec->found_root_item)
3277                                 continue;
3278                         errors++;
3279                         fprintf(stderr, "fs tree %llu not referenced\n",
3280                                 (unsigned long long)rec->objectid);
3281                 }
3282
3283                 error = 0;
3284                 if (rec->found_ref > 0 && !rec->found_root_item)
3285                         error = 1;
3286                 list_for_each_entry(backref, &rec->backrefs, list) {
3287                         if (!backref->found_dir_item)
3288                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3289                         if (!backref->found_dir_index)
3290                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3291                         if (!backref->found_back_ref)
3292                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3293                         if (!backref->found_forward_ref)
3294                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3295                         if (backref->reachable && backref->errors)
3296                                 error = 1;
3297                 }
3298                 if (!error)
3299                         continue;
3300
3301                 errors++;
3302                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3303                         (unsigned long long)rec->objectid, rec->found_ref,
3304                          rec->found_root_item ? "" : "not found");
3305
3306                 list_for_each_entry(backref, &rec->backrefs, list) {
3307                         if (!backref->reachable)
3308                                 continue;
3309                         if (!backref->errors && rec->found_root_item)
3310                                 continue;
3311                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3312                                 " index %llu namelen %u name %s errors %x\n",
3313                                 (unsigned long long)backref->ref_root,
3314                                 (unsigned long long)backref->dir,
3315                                 (unsigned long long)backref->index,
3316                                 backref->namelen, backref->name,
3317                                 backref->errors);
3318                         print_ref_error(backref->errors);
3319                 }
3320         }
3321         return errors > 0 ? 1 : 0;
3322 }
3323
3324 static int process_root_ref(struct extent_buffer *eb, int slot,
3325                             struct btrfs_key *key,
3326                             struct cache_tree *root_cache)
3327 {
3328         u64 dirid;
3329         u64 index;
3330         u32 len;
3331         u32 name_len;
3332         struct btrfs_root_ref *ref;
3333         char namebuf[BTRFS_NAME_LEN];
3334         int error;
3335
3336         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3337
3338         dirid = btrfs_root_ref_dirid(eb, ref);
3339         index = btrfs_root_ref_sequence(eb, ref);
3340         name_len = btrfs_root_ref_name_len(eb, ref);
3341
3342         if (name_len <= BTRFS_NAME_LEN) {
3343                 len = name_len;
3344                 error = 0;
3345         } else {
3346                 len = BTRFS_NAME_LEN;
3347                 error = REF_ERR_NAME_TOO_LONG;
3348         }
3349         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3350
3351         if (key->type == BTRFS_ROOT_REF_KEY) {
3352                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3353                                  index, namebuf, len, key->type, error);
3354         } else {
3355                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3356                                  index, namebuf, len, key->type, error);
3357         }
3358         return 0;
3359 }
3360
3361 static void free_corrupt_block(struct cache_extent *cache)
3362 {
3363         struct btrfs_corrupt_block *corrupt;
3364
3365         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3366         free(corrupt);
3367 }
3368
3369 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3370
3371 /*
3372  * Repair the btree of the given root.
3373  *
3374  * The fix is to remove the node key in corrupt_blocks cache_tree.
3375  * and rebalance the tree.
3376  * After the fix, the btree should be writeable.
3377  */
3378 static int repair_btree(struct btrfs_root *root,
3379                         struct cache_tree *corrupt_blocks)
3380 {
3381         struct btrfs_trans_handle *trans;
3382         struct btrfs_path *path;
3383         struct btrfs_corrupt_block *corrupt;
3384         struct cache_extent *cache;
3385         struct btrfs_key key;
3386         u64 offset;
3387         int level;
3388         int ret = 0;
3389
3390         if (cache_tree_empty(corrupt_blocks))
3391                 return 0;
3392
3393         path = btrfs_alloc_path();
3394         if (!path)
3395                 return -ENOMEM;
3396
3397         trans = btrfs_start_transaction(root, 1);
3398         if (IS_ERR(trans)) {
3399                 ret = PTR_ERR(trans);
3400                 fprintf(stderr, "Error starting transaction: %s\n",
3401                         strerror(-ret));
3402                 goto out_free_path;
3403         }
3404         cache = first_cache_extent(corrupt_blocks);
3405         while (cache) {
3406                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3407                                        cache);
3408                 level = corrupt->level;
3409                 path->lowest_level = level;
3410                 key.objectid = corrupt->key.objectid;
3411                 key.type = corrupt->key.type;
3412                 key.offset = corrupt->key.offset;
3413
3414                 /*
3415                  * Here we don't want to do any tree balance, since it may
3416                  * cause a balance with corrupted brother leaf/node,
3417                  * so ins_len set to 0 here.
3418                  * Balance will be done after all corrupt node/leaf is deleted.
3419                  */
3420                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3421                 if (ret < 0)
3422                         goto out;
3423                 offset = btrfs_node_blockptr(path->nodes[level],
3424                                              path->slots[level]);
3425
3426                 /* Remove the ptr */
3427                 ret = btrfs_del_ptr(trans, root, path, level,
3428                                     path->slots[level]);
3429                 if (ret < 0)
3430                         goto out;
3431                 /*
3432                  * Remove the corresponding extent
3433                  * return value is not concerned.
3434                  */
3435                 btrfs_release_path(path);
3436                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3437                                         0, root->root_key.objectid,
3438                                         level - 1, 0);
3439                 cache = next_cache_extent(cache);
3440         }
3441
3442         /* Balance the btree using btrfs_search_slot() */
3443         cache = first_cache_extent(corrupt_blocks);
3444         while (cache) {
3445                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3446                                        cache);
3447                 memcpy(&key, &corrupt->key, sizeof(key));
3448                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3449                 if (ret < 0)
3450                         goto out;
3451                 /* return will always >0 since it won't find the item */
3452                 ret = 0;
3453                 btrfs_release_path(path);
3454                 cache = next_cache_extent(cache);
3455         }
3456 out:
3457         btrfs_commit_transaction(trans, root);
3458 out_free_path:
3459         btrfs_free_path(path);
3460         return ret;
3461 }
3462
3463 static int check_fs_root(struct btrfs_root *root,
3464                          struct cache_tree *root_cache,
3465                          struct walk_control *wc)
3466 {
3467         int ret = 0;
3468         int err = 0;
3469         int wret;
3470         int level;
3471         struct btrfs_path path;
3472         struct shared_node root_node;
3473         struct root_record *rec;
3474         struct btrfs_root_item *root_item = &root->root_item;
3475         struct cache_tree corrupt_blocks;
3476         struct orphan_data_extent *orphan;
3477         struct orphan_data_extent *tmp;
3478         enum btrfs_tree_block_status status;
3479
3480         /*
3481          * Reuse the corrupt_block cache tree to record corrupted tree block
3482          *
3483          * Unlike the usage in extent tree check, here we do it in a per
3484          * fs/subvol tree base.
3485          */
3486         cache_tree_init(&corrupt_blocks);
3487         root->fs_info->corrupt_blocks = &corrupt_blocks;
3488
3489         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3490                 rec = get_root_rec(root_cache, root->root_key.objectid);
3491                 BUG_ON(IS_ERR(rec));
3492                 if (btrfs_root_refs(root_item) > 0)
3493                         rec->found_root_item = 1;
3494         }
3495
3496         btrfs_init_path(&path);
3497         memset(&root_node, 0, sizeof(root_node));
3498         cache_tree_init(&root_node.root_cache);
3499         cache_tree_init(&root_node.inode_cache);
3500
3501         /* Move the orphan extent record to corresponding inode_record */
3502         list_for_each_entry_safe(orphan, tmp,
3503                                  &root->orphan_data_extents, list) {
3504                 struct inode_record *inode;
3505
3506                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3507                                       1);
3508                 BUG_ON(IS_ERR(inode));
3509                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3510                 list_move(&orphan->list, &inode->orphan_extents);
3511         }
3512
3513         level = btrfs_header_level(root->node);
3514         memset(wc->nodes, 0, sizeof(wc->nodes));
3515         wc->nodes[level] = &root_node;
3516         wc->active_node = level;
3517         wc->root_level = level;
3518
3519         /* We may not have checked the root block, lets do that now */
3520         if (btrfs_is_leaf(root->node))
3521                 status = btrfs_check_leaf(root, NULL, root->node);
3522         else
3523                 status = btrfs_check_node(root, NULL, root->node);
3524         if (status != BTRFS_TREE_BLOCK_CLEAN)
3525                 return -EIO;
3526
3527         if (btrfs_root_refs(root_item) > 0 ||
3528             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3529                 path.nodes[level] = root->node;
3530                 extent_buffer_get(root->node);
3531                 path.slots[level] = 0;
3532         } else {
3533                 struct btrfs_key key;
3534                 struct btrfs_disk_key found_key;
3535
3536                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3537                 level = root_item->drop_level;
3538                 path.lowest_level = level;
3539                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3540                 if (wret < 0)
3541                         goto skip_walking;
3542                 btrfs_node_key(path.nodes[level], &found_key,
3543                                 path.slots[level]);
3544                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3545                                         sizeof(found_key)));
3546         }
3547
3548         while (1) {
3549                 wret = walk_down_tree(root, &path, wc, &level);
3550                 if (wret < 0)
3551                         ret = wret;
3552                 if (wret != 0)
3553                         break;
3554
3555                 wret = walk_up_tree(root, &path, wc, &level);
3556                 if (wret < 0)
3557                         ret = wret;
3558                 if (wret != 0)
3559                         break;
3560         }
3561 skip_walking:
3562         btrfs_release_path(&path);
3563
3564         if (!cache_tree_empty(&corrupt_blocks)) {
3565                 struct cache_extent *cache;
3566                 struct btrfs_corrupt_block *corrupt;
3567
3568                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3569                        root->root_key.objectid);
3570                 cache = first_cache_extent(&corrupt_blocks);
3571                 while (cache) {
3572                         corrupt = container_of(cache,
3573                                                struct btrfs_corrupt_block,
3574                                                cache);
3575                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3576                                cache->start, corrupt->level,
3577                                corrupt->key.objectid, corrupt->key.type,
3578                                corrupt->key.offset);
3579                         cache = next_cache_extent(cache);
3580                 }
3581                 if (repair) {
3582                         printf("Try to repair the btree for root %llu\n",
3583                                root->root_key.objectid);
3584                         ret = repair_btree(root, &corrupt_blocks);
3585                         if (ret < 0)
3586                                 fprintf(stderr, "Failed to repair btree: %s\n",
3587                                         strerror(-ret));
3588                         if (!ret)
3589                                 printf("Btree for root %llu is fixed\n",
3590                                        root->root_key.objectid);
3591                 }
3592         }
3593
3594         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3595         if (err < 0)
3596                 ret = err;
3597
3598         if (root_node.current) {
3599                 root_node.current->checked = 1;
3600                 maybe_free_inode_rec(&root_node.inode_cache,
3601                                 root_node.current);
3602         }
3603
3604         err = check_inode_recs(root, &root_node.inode_cache);
3605         if (!ret)
3606                 ret = err;
3607
3608         free_corrupt_blocks_tree(&corrupt_blocks);
3609         root->fs_info->corrupt_blocks = NULL;
3610         free_orphan_data_extents(&root->orphan_data_extents);
3611         return ret;
3612 }
3613
3614 static int fs_root_objectid(u64 objectid)
3615 {
3616         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3617             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3618                 return 1;
3619         return is_fstree(objectid);
3620 }
3621
3622 static int check_fs_roots(struct btrfs_root *root,
3623                           struct cache_tree *root_cache)
3624 {
3625         struct btrfs_path path;
3626         struct btrfs_key key;
3627         struct walk_control wc;
3628         struct extent_buffer *leaf, *tree_node;
3629         struct btrfs_root *tmp_root;
3630         struct btrfs_root *tree_root = root->fs_info->tree_root;
3631         int ret;
3632         int err = 0;
3633
3634         if (ctx.progress_enabled) {
3635                 ctx.tp = TASK_FS_ROOTS;
3636                 task_start(ctx.info);
3637         }
3638
3639         /*
3640          * Just in case we made any changes to the extent tree that weren't
3641          * reflected into the free space cache yet.
3642          */
3643         if (repair)
3644                 reset_cached_block_groups(root->fs_info);
3645         memset(&wc, 0, sizeof(wc));
3646         cache_tree_init(&wc.shared);
3647         btrfs_init_path(&path);
3648
3649 again:
3650         key.offset = 0;
3651         key.objectid = 0;
3652         key.type = BTRFS_ROOT_ITEM_KEY;
3653         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3654         if (ret < 0) {
3655                 err = 1;
3656                 goto out;
3657         }
3658         tree_node = tree_root->node;
3659         while (1) {
3660                 if (tree_node != tree_root->node) {
3661                         free_root_recs_tree(root_cache);
3662                         btrfs_release_path(&path);
3663                         goto again;
3664                 }
3665                 leaf = path.nodes[0];
3666                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3667                         ret = btrfs_next_leaf(tree_root, &path);
3668                         if (ret) {
3669                                 if (ret < 0)
3670                                         err = 1;
3671                                 break;
3672                         }
3673                         leaf = path.nodes[0];
3674                 }
3675                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3676                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3677                     fs_root_objectid(key.objectid)) {
3678                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3679                                 tmp_root = btrfs_read_fs_root_no_cache(
3680                                                 root->fs_info, &key);
3681                         } else {
3682                                 key.offset = (u64)-1;
3683                                 tmp_root = btrfs_read_fs_root(
3684                                                 root->fs_info, &key);
3685                         }
3686                         if (IS_ERR(tmp_root)) {
3687                                 err = 1;
3688                                 goto next;
3689                         }
3690                         ret = check_fs_root(tmp_root, root_cache, &wc);
3691                         if (ret == -EAGAIN) {
3692                                 free_root_recs_tree(root_cache);
3693                                 btrfs_release_path(&path);
3694                                 goto again;
3695                         }
3696                         if (ret)
3697                                 err = 1;
3698                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3699                                 btrfs_free_fs_root(tmp_root);
3700                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3701                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3702                         process_root_ref(leaf, path.slots[0], &key,
3703                                          root_cache);
3704                 }
3705 next:
3706                 path.slots[0]++;
3707         }
3708 out:
3709         btrfs_release_path(&path);
3710         if (err)
3711                 free_extent_cache_tree(&wc.shared);
3712         if (!cache_tree_empty(&wc.shared))
3713                 fprintf(stderr, "warning line %d\n", __LINE__);
3714
3715         task_stop(ctx.info);
3716
3717         return err;
3718 }
3719
3720 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3721 {
3722         struct list_head *cur = rec->backrefs.next;
3723         struct extent_backref *back;
3724         struct tree_backref *tback;
3725         struct data_backref *dback;
3726         u64 found = 0;
3727         int err = 0;
3728
3729         while(cur != &rec->backrefs) {
3730                 back = list_entry(cur, struct extent_backref, list);
3731                 cur = cur->next;
3732                 if (!back->found_extent_tree) {
3733                         err = 1;
3734                         if (!print_errs)
3735                                 goto out;
3736                         if (back->is_data) {
3737                                 dback = (struct data_backref *)back;
3738                                 fprintf(stderr, "Backref %llu %s %llu"
3739                                         " owner %llu offset %llu num_refs %lu"
3740                                         " not found in extent tree\n",
3741                                         (unsigned long long)rec->start,
3742                                         back->full_backref ?
3743                                         "parent" : "root",
3744                                         back->full_backref ?
3745                                         (unsigned long long)dback->parent:
3746                                         (unsigned long long)dback->root,
3747                                         (unsigned long long)dback->owner,
3748                                         (unsigned long long)dback->offset,
3749                                         (unsigned long)dback->num_refs);
3750                         } else {
3751                                 tback = (struct tree_backref *)back;
3752                                 fprintf(stderr, "Backref %llu parent %llu"
3753                                         " root %llu not found in extent tree\n",
3754                                         (unsigned long long)rec->start,
3755                                         (unsigned long long)tback->parent,
3756                                         (unsigned long long)tback->root);
3757                         }
3758                 }
3759                 if (!back->is_data && !back->found_ref) {
3760                         err = 1;
3761                         if (!print_errs)
3762                                 goto out;
3763                         tback = (struct tree_backref *)back;
3764                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3765                                 (unsigned long long)rec->start,
3766                                 back->full_backref ? "parent" : "root",
3767                                 back->full_backref ?
3768                                 (unsigned long long)tback->parent :
3769                                 (unsigned long long)tback->root, back);
3770                 }
3771                 if (back->is_data) {
3772                         dback = (struct data_backref *)back;
3773                         if (dback->found_ref != dback->num_refs) {
3774                                 err = 1;
3775                                 if (!print_errs)
3776                                         goto out;
3777                                 fprintf(stderr, "Incorrect local backref count"
3778                                         " on %llu %s %llu owner %llu"
3779                                         " offset %llu found %u wanted %u back %p\n",
3780                                         (unsigned long long)rec->start,
3781                                         back->full_backref ?
3782                                         "parent" : "root",
3783                                         back->full_backref ?
3784                                         (unsigned long long)dback->parent:
3785                                         (unsigned long long)dback->root,
3786                                         (unsigned long long)dback->owner,
3787                                         (unsigned long long)dback->offset,
3788                                         dback->found_ref, dback->num_refs, back);
3789                         }
3790                         if (dback->disk_bytenr != rec->start) {
3791                                 err = 1;
3792                                 if (!print_errs)
3793                                         goto out;
3794                                 fprintf(stderr, "Backref disk bytenr does not"
3795                                         " match extent record, bytenr=%llu, "
3796                                         "ref bytenr=%llu\n",
3797                                         (unsigned long long)rec->start,
3798                                         (unsigned long long)dback->disk_bytenr);
3799                         }
3800
3801                         if (dback->bytes != rec->nr) {
3802                                 err = 1;
3803                                 if (!print_errs)
3804                                         goto out;
3805                                 fprintf(stderr, "Backref bytes do not match "
3806                                         "extent backref, bytenr=%llu, ref "
3807                                         "bytes=%llu, backref bytes=%llu\n",
3808                                         (unsigned long long)rec->start,
3809                                         (unsigned long long)rec->nr,
3810                                         (unsigned long long)dback->bytes);
3811                         }
3812                 }
3813                 if (!back->is_data) {
3814                         found += 1;
3815                 } else {
3816                         dback = (struct data_backref *)back;
3817                         found += dback->found_ref;
3818                 }
3819         }
3820         if (found != rec->refs) {
3821                 err = 1;
3822                 if (!print_errs)
3823                         goto out;
3824                 fprintf(stderr, "Incorrect global backref count "
3825                         "on %llu found %llu wanted %llu\n",
3826                         (unsigned long long)rec->start,
3827                         (unsigned long long)found,
3828                         (unsigned long long)rec->refs);
3829         }
3830 out:
3831         return err;
3832 }
3833
3834 static int free_all_extent_backrefs(struct extent_record *rec)
3835 {
3836         struct extent_backref *back;
3837         struct list_head *cur;
3838         while (!list_empty(&rec->backrefs)) {
3839                 cur = rec->backrefs.next;
3840                 back = list_entry(cur, struct extent_backref, list);
3841                 list_del(cur);
3842                 free(back);
3843         }
3844         return 0;
3845 }
3846
3847 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3848                                      struct cache_tree *extent_cache)
3849 {
3850         struct cache_extent *cache;
3851         struct extent_record *rec;
3852
3853         while (1) {
3854                 cache = first_cache_extent(extent_cache);
3855                 if (!cache)
3856                         break;
3857                 rec = container_of(cache, struct extent_record, cache);
3858                 remove_cache_extent(extent_cache, cache);
3859                 free_all_extent_backrefs(rec);
3860                 free(rec);
3861         }
3862 }
3863
3864 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3865                                  struct extent_record *rec)
3866 {
3867         if (rec->content_checked && rec->owner_ref_checked &&
3868             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3869             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3870             !rec->bad_full_backref && !rec->crossing_stripes &&
3871             !rec->wrong_chunk_type) {
3872                 remove_cache_extent(extent_cache, &rec->cache);
3873                 free_all_extent_backrefs(rec);
3874                 list_del_init(&rec->list);
3875                 free(rec);
3876         }
3877         return 0;
3878 }
3879
3880 static int check_owner_ref(struct btrfs_root *root,
3881                             struct extent_record *rec,
3882                             struct extent_buffer *buf)
3883 {
3884         struct extent_backref *node;
3885         struct tree_backref *back;
3886         struct btrfs_root *ref_root;
3887         struct btrfs_key key;
3888         struct btrfs_path path;
3889         struct extent_buffer *parent;
3890         int level;
3891         int found = 0;
3892         int ret;
3893
3894         list_for_each_entry(node, &rec->backrefs, list) {
3895                 if (node->is_data)
3896                         continue;
3897                 if (!node->found_ref)
3898                         continue;
3899                 if (node->full_backref)
3900                         continue;
3901                 back = (struct tree_backref *)node;
3902                 if (btrfs_header_owner(buf) == back->root)
3903                         return 0;
3904         }
3905         BUG_ON(rec->is_root);
3906
3907         /* try to find the block by search corresponding fs tree */
3908         key.objectid = btrfs_header_owner(buf);
3909         key.type = BTRFS_ROOT_ITEM_KEY;
3910         key.offset = (u64)-1;
3911
3912         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3913         if (IS_ERR(ref_root))
3914                 return 1;
3915
3916         level = btrfs_header_level(buf);
3917         if (level == 0)
3918                 btrfs_item_key_to_cpu(buf, &key, 0);
3919         else
3920                 btrfs_node_key_to_cpu(buf, &key, 0);
3921
3922         btrfs_init_path(&path);
3923         path.lowest_level = level + 1;
3924         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3925         if (ret < 0)
3926                 return 0;
3927
3928         parent = path.nodes[level + 1];
3929         if (parent && buf->start == btrfs_node_blockptr(parent,
3930                                                         path.slots[level + 1]))
3931                 found = 1;
3932
3933         btrfs_release_path(&path);
3934         return found ? 0 : 1;
3935 }
3936
3937 static int is_extent_tree_record(struct extent_record *rec)
3938 {
3939         struct list_head *cur = rec->backrefs.next;
3940         struct extent_backref *node;
3941         struct tree_backref *back;
3942         int is_extent = 0;
3943
3944         while(cur != &rec->backrefs) {
3945                 node = list_entry(cur, struct extent_backref, list);
3946                 cur = cur->next;
3947                 if (node->is_data)
3948                         return 0;
3949                 back = (struct tree_backref *)node;
3950                 if (node->full_backref)
3951                         return 0;
3952                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3953                         is_extent = 1;
3954         }
3955         return is_extent;
3956 }
3957
3958
3959 static int record_bad_block_io(struct btrfs_fs_info *info,
3960                                struct cache_tree *extent_cache,
3961                                u64 start, u64 len)
3962 {
3963         struct extent_record *rec;
3964         struct cache_extent *cache;
3965         struct btrfs_key key;
3966
3967         cache = lookup_cache_extent(extent_cache, start, len);
3968         if (!cache)
3969                 return 0;
3970
3971         rec = container_of(cache, struct extent_record, cache);
3972         if (!is_extent_tree_record(rec))
3973                 return 0;
3974
3975         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3976         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3977 }
3978
3979 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3980                        struct extent_buffer *buf, int slot)
3981 {
3982         if (btrfs_header_level(buf)) {
3983                 struct btrfs_key_ptr ptr1, ptr2;
3984
3985                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3986                                    sizeof(struct btrfs_key_ptr));
3987                 read_extent_buffer(buf, &ptr2,
3988                                    btrfs_node_key_ptr_offset(slot + 1),
3989                                    sizeof(struct btrfs_key_ptr));
3990                 write_extent_buffer(buf, &ptr1,
3991                                     btrfs_node_key_ptr_offset(slot + 1),
3992                                     sizeof(struct btrfs_key_ptr));
3993                 write_extent_buffer(buf, &ptr2,
3994                                     btrfs_node_key_ptr_offset(slot),
3995                                     sizeof(struct btrfs_key_ptr));
3996                 if (slot == 0) {
3997                         struct btrfs_disk_key key;
3998                         btrfs_node_key(buf, &key, 0);
3999                         btrfs_fixup_low_keys(root, path, &key,
4000                                              btrfs_header_level(buf) + 1);
4001                 }
4002         } else {
4003                 struct btrfs_item *item1, *item2;
4004                 struct btrfs_key k1, k2;
4005                 char *item1_data, *item2_data;
4006                 u32 item1_offset, item2_offset, item1_size, item2_size;
4007
4008                 item1 = btrfs_item_nr(slot);
4009                 item2 = btrfs_item_nr(slot + 1);
4010                 btrfs_item_key_to_cpu(buf, &k1, slot);
4011                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4012                 item1_offset = btrfs_item_offset(buf, item1);
4013                 item2_offset = btrfs_item_offset(buf, item2);
4014                 item1_size = btrfs_item_size(buf, item1);
4015                 item2_size = btrfs_item_size(buf, item2);
4016
4017                 item1_data = malloc(item1_size);
4018                 if (!item1_data)
4019                         return -ENOMEM;
4020                 item2_data = malloc(item2_size);
4021                 if (!item2_data) {
4022                         free(item1_data);
4023                         return -ENOMEM;
4024                 }
4025
4026                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4027                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4028
4029                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4030                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4031                 free(item1_data);
4032                 free(item2_data);
4033
4034                 btrfs_set_item_offset(buf, item1, item2_offset);
4035                 btrfs_set_item_offset(buf, item2, item1_offset);
4036                 btrfs_set_item_size(buf, item1, item2_size);
4037                 btrfs_set_item_size(buf, item2, item1_size);
4038
4039                 path->slots[0] = slot;
4040                 btrfs_set_item_key_unsafe(root, path, &k2);
4041                 path->slots[0] = slot + 1;
4042                 btrfs_set_item_key_unsafe(root, path, &k1);
4043         }
4044         return 0;
4045 }
4046
4047 static int fix_key_order(struct btrfs_trans_handle *trans,
4048                          struct btrfs_root *root,
4049                          struct btrfs_path *path)
4050 {
4051         struct extent_buffer *buf;
4052         struct btrfs_key k1, k2;
4053         int i;
4054         int level = path->lowest_level;
4055         int ret = -EIO;
4056
4057         buf = path->nodes[level];
4058         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4059                 if (level) {
4060                         btrfs_node_key_to_cpu(buf, &k1, i);
4061                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4062                 } else {
4063                         btrfs_item_key_to_cpu(buf, &k1, i);
4064                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4065                 }
4066                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4067                         continue;
4068                 ret = swap_values(root, path, buf, i);
4069                 if (ret)
4070                         break;
4071                 btrfs_mark_buffer_dirty(buf);
4072                 i = 0;
4073         }
4074         return ret;
4075 }
4076
4077 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4078                              struct btrfs_root *root,
4079                              struct btrfs_path *path,
4080                              struct extent_buffer *buf, int slot)
4081 {
4082         struct btrfs_key key;
4083         int nritems = btrfs_header_nritems(buf);
4084
4085         btrfs_item_key_to_cpu(buf, &key, slot);
4086
4087         /* These are all the keys we can deal with missing. */
4088         if (key.type != BTRFS_DIR_INDEX_KEY &&
4089             key.type != BTRFS_EXTENT_ITEM_KEY &&
4090             key.type != BTRFS_METADATA_ITEM_KEY &&
4091             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4092             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4093                 return -1;
4094
4095         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4096                (unsigned long long)key.objectid, key.type,
4097                (unsigned long long)key.offset, slot, buf->start);
4098         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4099                               btrfs_item_nr_offset(slot + 1),
4100                               sizeof(struct btrfs_item) *
4101                               (nritems - slot - 1));
4102         btrfs_set_header_nritems(buf, nritems - 1);
4103         if (slot == 0) {
4104                 struct btrfs_disk_key disk_key;
4105
4106                 btrfs_item_key(buf, &disk_key, 0);
4107                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4108         }
4109         btrfs_mark_buffer_dirty(buf);
4110         return 0;
4111 }
4112
4113 static int fix_item_offset(struct btrfs_trans_handle *trans,
4114                            struct btrfs_root *root,
4115                            struct btrfs_path *path)
4116 {
4117         struct extent_buffer *buf;
4118         int i;
4119         int ret = 0;
4120
4121         /* We should only get this for leaves */
4122         BUG_ON(path->lowest_level);
4123         buf = path->nodes[0];
4124 again:
4125         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4126                 unsigned int shift = 0, offset;
4127
4128                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4129                     BTRFS_LEAF_DATA_SIZE(root)) {
4130                         if (btrfs_item_end_nr(buf, i) >
4131                             BTRFS_LEAF_DATA_SIZE(root)) {
4132                                 ret = delete_bogus_item(trans, root, path,
4133                                                         buf, i);
4134                                 if (!ret)
4135                                         goto again;
4136                                 fprintf(stderr, "item is off the end of the "
4137                                         "leaf, can't fix\n");
4138                                 ret = -EIO;
4139                                 break;
4140                         }
4141                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4142                                 btrfs_item_end_nr(buf, i);
4143                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4144                            btrfs_item_offset_nr(buf, i - 1)) {
4145                         if (btrfs_item_end_nr(buf, i) >
4146                             btrfs_item_offset_nr(buf, i - 1)) {
4147                                 ret = delete_bogus_item(trans, root, path,
4148                                                         buf, i);
4149                                 if (!ret)
4150                                         goto again;
4151                                 fprintf(stderr, "items overlap, can't fix\n");
4152                                 ret = -EIO;
4153                                 break;
4154                         }
4155                         shift = btrfs_item_offset_nr(buf, i - 1) -
4156                                 btrfs_item_end_nr(buf, i);
4157                 }
4158                 if (!shift)
4159                         continue;
4160
4161                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4162                        i, shift, (unsigned long long)buf->start);
4163                 offset = btrfs_item_offset_nr(buf, i);
4164                 memmove_extent_buffer(buf,
4165                                       btrfs_leaf_data(buf) + offset + shift,
4166                                       btrfs_leaf_data(buf) + offset,
4167                                       btrfs_item_size_nr(buf, i));
4168                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4169                                       offset + shift);
4170                 btrfs_mark_buffer_dirty(buf);
4171         }
4172
4173         /*
4174          * We may have moved things, in which case we want to exit so we don't
4175          * write those changes out.  Once we have proper abort functionality in
4176          * progs this can be changed to something nicer.
4177          */
4178         BUG_ON(ret);
4179         return ret;
4180 }
4181
4182 /*
4183  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4184  * then just return -EIO.
4185  */
4186 static int try_to_fix_bad_block(struct btrfs_root *root,
4187                                 struct extent_buffer *buf,
4188                                 enum btrfs_tree_block_status status)
4189 {
4190         struct btrfs_trans_handle *trans;
4191         struct ulist *roots;
4192         struct ulist_node *node;
4193         struct btrfs_root *search_root;
4194         struct btrfs_path *path;
4195         struct ulist_iterator iter;
4196         struct btrfs_key root_key, key;
4197         int ret;
4198
4199         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4200             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4201                 return -EIO;
4202
4203         path = btrfs_alloc_path();
4204         if (!path)
4205                 return -EIO;
4206
4207         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4208                                    0, &roots);
4209         if (ret) {
4210                 btrfs_free_path(path);
4211                 return -EIO;
4212         }
4213
4214         ULIST_ITER_INIT(&iter);
4215         while ((node = ulist_next(roots, &iter))) {
4216                 root_key.objectid = node->val;
4217                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4218                 root_key.offset = (u64)-1;
4219
4220                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4221                 if (IS_ERR(root)) {
4222                         ret = -EIO;
4223                         break;
4224                 }
4225
4226
4227                 trans = btrfs_start_transaction(search_root, 0);
4228                 if (IS_ERR(trans)) {
4229                         ret = PTR_ERR(trans);
4230                         break;
4231                 }
4232
4233                 path->lowest_level = btrfs_header_level(buf);
4234                 path->skip_check_block = 1;
4235                 if (path->lowest_level)
4236                         btrfs_node_key_to_cpu(buf, &key, 0);
4237                 else
4238                         btrfs_item_key_to_cpu(buf, &key, 0);
4239                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4240                 if (ret) {
4241                         ret = -EIO;
4242                         btrfs_commit_transaction(trans, search_root);
4243                         break;
4244                 }
4245                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4246                         ret = fix_key_order(trans, search_root, path);
4247                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4248                         ret = fix_item_offset(trans, search_root, path);
4249                 if (ret) {
4250                         btrfs_commit_transaction(trans, search_root);
4251                         break;
4252                 }
4253                 btrfs_release_path(path);
4254                 btrfs_commit_transaction(trans, search_root);
4255         }
4256         ulist_free(roots);
4257         btrfs_free_path(path);
4258         return ret;
4259 }
4260
4261 static int check_block(struct btrfs_root *root,
4262                        struct cache_tree *extent_cache,
4263                        struct extent_buffer *buf, u64 flags)
4264 {
4265         struct extent_record *rec;
4266         struct cache_extent *cache;
4267         struct btrfs_key key;
4268         enum btrfs_tree_block_status status;
4269         int ret = 0;
4270         int level;
4271
4272         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4273         if (!cache)
4274                 return 1;
4275         rec = container_of(cache, struct extent_record, cache);
4276         rec->generation = btrfs_header_generation(buf);
4277
4278         level = btrfs_header_level(buf);
4279         if (btrfs_header_nritems(buf) > 0) {
4280
4281                 if (level == 0)
4282                         btrfs_item_key_to_cpu(buf, &key, 0);
4283                 else
4284                         btrfs_node_key_to_cpu(buf, &key, 0);
4285
4286                 rec->info_objectid = key.objectid;
4287         }
4288         rec->info_level = level;
4289
4290         if (btrfs_is_leaf(buf))
4291                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4292         else
4293                 status = btrfs_check_node(root, &rec->parent_key, buf);
4294
4295         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4296                 if (repair)
4297                         status = try_to_fix_bad_block(root, buf, status);
4298                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4299                         ret = -EIO;
4300                         fprintf(stderr, "bad block %llu\n",
4301                                 (unsigned long long)buf->start);
4302                 } else {
4303                         /*
4304                          * Signal to callers we need to start the scan over
4305                          * again since we'll have cow'ed blocks.
4306                          */
4307                         ret = -EAGAIN;
4308                 }
4309         } else {
4310                 rec->content_checked = 1;
4311                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4312                         rec->owner_ref_checked = 1;
4313                 else {
4314                         ret = check_owner_ref(root, rec, buf);
4315                         if (!ret)
4316                                 rec->owner_ref_checked = 1;
4317                 }
4318         }
4319         if (!ret)
4320                 maybe_free_extent_rec(extent_cache, rec);
4321         return ret;
4322 }
4323
4324 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4325                                                 u64 parent, u64 root)
4326 {
4327         struct list_head *cur = rec->backrefs.next;
4328         struct extent_backref *node;
4329         struct tree_backref *back;
4330
4331         while(cur != &rec->backrefs) {
4332                 node = list_entry(cur, struct extent_backref, list);
4333                 cur = cur->next;
4334                 if (node->is_data)
4335                         continue;
4336                 back = (struct tree_backref *)node;
4337                 if (parent > 0) {
4338                         if (!node->full_backref)
4339                                 continue;
4340                         if (parent == back->parent)
4341                                 return back;
4342                 } else {
4343                         if (node->full_backref)
4344                                 continue;
4345                         if (back->root == root)
4346                                 return back;
4347                 }
4348         }
4349         return NULL;
4350 }
4351
4352 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4353                                                 u64 parent, u64 root)
4354 {
4355         struct tree_backref *ref = malloc(sizeof(*ref));
4356         memset(&ref->node, 0, sizeof(ref->node));
4357         if (parent > 0) {
4358                 ref->parent = parent;
4359                 ref->node.full_backref = 1;
4360         } else {
4361                 ref->root = root;
4362                 ref->node.full_backref = 0;
4363         }
4364         list_add_tail(&ref->node.list, &rec->backrefs);
4365
4366         return ref;
4367 }
4368
4369 static struct data_backref *find_data_backref(struct extent_record *rec,
4370                                                 u64 parent, u64 root,
4371                                                 u64 owner, u64 offset,
4372                                                 int found_ref,
4373                                                 u64 disk_bytenr, u64 bytes)
4374 {
4375         struct list_head *cur = rec->backrefs.next;
4376         struct extent_backref *node;
4377         struct data_backref *back;
4378
4379         while(cur != &rec->backrefs) {
4380                 node = list_entry(cur, struct extent_backref, list);
4381                 cur = cur->next;
4382                 if (!node->is_data)
4383                         continue;
4384                 back = (struct data_backref *)node;
4385                 if (parent > 0) {
4386                         if (!node->full_backref)
4387                                 continue;
4388                         if (parent == back->parent)
4389                                 return back;
4390                 } else {
4391                         if (node->full_backref)
4392                                 continue;
4393                         if (back->root == root && back->owner == owner &&
4394                             back->offset == offset) {
4395                                 if (found_ref && node->found_ref &&
4396                                     (back->bytes != bytes ||
4397                                     back->disk_bytenr != disk_bytenr))
4398                                         continue;
4399                                 return back;
4400                         }
4401                 }
4402         }
4403         return NULL;
4404 }
4405
4406 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4407                                                 u64 parent, u64 root,
4408                                                 u64 owner, u64 offset,
4409                                                 u64 max_size)
4410 {
4411         struct data_backref *ref = malloc(sizeof(*ref));
4412         memset(&ref->node, 0, sizeof(ref->node));
4413         ref->node.is_data = 1;
4414
4415         if (parent > 0) {
4416                 ref->parent = parent;
4417                 ref->owner = 0;
4418                 ref->offset = 0;
4419                 ref->node.full_backref = 1;
4420         } else {
4421                 ref->root = root;
4422                 ref->owner = owner;
4423                 ref->offset = offset;
4424                 ref->node.full_backref = 0;
4425         }
4426         ref->bytes = max_size;
4427         ref->found_ref = 0;
4428         ref->num_refs = 0;
4429         list_add_tail(&ref->node.list, &rec->backrefs);
4430         if (max_size > rec->max_size)
4431                 rec->max_size = max_size;
4432         return ref;
4433 }
4434
4435 /* Check if the type of extent matches with its chunk */
4436 static void check_extent_type(struct extent_record *rec)
4437 {
4438         struct btrfs_block_group_cache *bg_cache;
4439
4440         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4441         if (!bg_cache)
4442                 return;
4443
4444         /* data extent, check chunk directly*/
4445         if (!rec->metadata) {
4446                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4447                         rec->wrong_chunk_type = 1;
4448                 return;
4449         }
4450
4451         /* metadata extent, check the obvious case first */
4452         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4453                                  BTRFS_BLOCK_GROUP_METADATA))) {
4454                 rec->wrong_chunk_type = 1;
4455                 return;
4456         }
4457
4458         /*
4459          * Check SYSTEM extent, as it's also marked as metadata, we can only
4460          * make sure it's a SYSTEM extent by its backref
4461          */
4462         if (!list_empty(&rec->backrefs)) {
4463                 struct extent_backref *node;
4464                 struct tree_backref *tback;
4465                 u64 bg_type;
4466
4467                 node = list_entry(rec->backrefs.next, struct extent_backref,
4468                                   list);
4469                 if (node->is_data) {
4470                         /* tree block shouldn't have data backref */
4471                         rec->wrong_chunk_type = 1;
4472                         return;
4473                 }
4474                 tback = container_of(node, struct tree_backref, node);
4475
4476                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4477                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4478                 else
4479                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4480                 if (!(bg_cache->flags & bg_type))
4481                         rec->wrong_chunk_type = 1;
4482         }
4483 }
4484
4485 static int add_extent_rec(struct cache_tree *extent_cache,
4486                           struct btrfs_key *parent_key, u64 parent_gen,
4487                           u64 start, u64 nr, u64 extent_item_refs,
4488                           int is_root, int inc_ref, int set_checked,
4489                           int metadata, int extent_rec, u64 max_size)
4490 {
4491         struct extent_record *rec;
4492         struct cache_extent *cache;
4493         int ret = 0;
4494         int dup = 0;
4495
4496         cache = lookup_cache_extent(extent_cache, start, nr);
4497         if (cache) {
4498                 rec = container_of(cache, struct extent_record, cache);
4499                 if (inc_ref)
4500                         rec->refs++;
4501                 if (rec->nr == 1)
4502                         rec->nr = max(nr, max_size);
4503
4504                 /*
4505                  * We need to make sure to reset nr to whatever the extent
4506                  * record says was the real size, this way we can compare it to
4507                  * the backrefs.
4508                  */
4509                 if (extent_rec) {
4510                         if (start != rec->start || rec->found_rec) {
4511                                 struct extent_record *tmp;
4512
4513                                 dup = 1;
4514                                 if (list_empty(&rec->list))
4515                                         list_add_tail(&rec->list,
4516                                                       &duplicate_extents);
4517
4518                                 /*
4519                                  * We have to do this song and dance in case we
4520                                  * find an extent record that falls inside of
4521                                  * our current extent record but does not have
4522                                  * the same objectid.
4523                                  */
4524                                 tmp = malloc(sizeof(*tmp));
4525                                 if (!tmp)
4526                                         return -ENOMEM;
4527                                 tmp->start = start;
4528                                 tmp->max_size = max_size;
4529                                 tmp->nr = nr;
4530                                 tmp->found_rec = 1;
4531                                 tmp->metadata = metadata;
4532                                 tmp->extent_item_refs = extent_item_refs;
4533                                 INIT_LIST_HEAD(&tmp->list);
4534                                 list_add_tail(&tmp->list, &rec->dups);
4535                                 rec->num_duplicates++;
4536                         } else {
4537                                 rec->nr = nr;
4538                                 rec->found_rec = 1;
4539                         }
4540                 }
4541
4542                 if (extent_item_refs && !dup) {
4543                         if (rec->extent_item_refs) {
4544                                 fprintf(stderr, "block %llu rec "
4545                                         "extent_item_refs %llu, passed %llu\n",
4546                                         (unsigned long long)start,
4547                                         (unsigned long long)
4548                                                         rec->extent_item_refs,
4549                                         (unsigned long long)extent_item_refs);
4550                         }
4551                         rec->extent_item_refs = extent_item_refs;
4552                 }
4553                 if (is_root)
4554                         rec->is_root = 1;
4555                 if (set_checked) {
4556                         rec->content_checked = 1;
4557                         rec->owner_ref_checked = 1;
4558                 }
4559
4560                 if (parent_key)
4561                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4562                 if (parent_gen)
4563                         rec->parent_generation = parent_gen;
4564
4565                 if (rec->max_size < max_size)
4566                         rec->max_size = max_size;
4567
4568                 /*
4569                  * A metadata extent can't cross stripe_len boundary, otherwise
4570                  * kernel scrub won't be able to handle it.
4571                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4572                  * it.
4573                  */
4574                 if (metadata && check_crossing_stripes(rec->start,
4575                                                        rec->max_size))
4576                                 rec->crossing_stripes = 1;
4577                 check_extent_type(rec);
4578                 maybe_free_extent_rec(extent_cache, rec);
4579                 return ret;
4580         }
4581         rec = malloc(sizeof(*rec));
4582         rec->start = start;
4583         rec->max_size = max_size;
4584         rec->nr = max(nr, max_size);
4585         rec->found_rec = !!extent_rec;
4586         rec->content_checked = 0;
4587         rec->owner_ref_checked = 0;
4588         rec->num_duplicates = 0;
4589         rec->metadata = metadata;
4590         rec->flag_block_full_backref = -1;
4591         rec->bad_full_backref = 0;
4592         rec->crossing_stripes = 0;
4593         rec->wrong_chunk_type = 0;
4594         INIT_LIST_HEAD(&rec->backrefs);
4595         INIT_LIST_HEAD(&rec->dups);
4596         INIT_LIST_HEAD(&rec->list);
4597
4598         if (is_root)
4599                 rec->is_root = 1;
4600         else
4601                 rec->is_root = 0;
4602
4603         if (inc_ref)
4604                 rec->refs = 1;
4605         else
4606                 rec->refs = 0;
4607
4608         if (extent_item_refs)
4609                 rec->extent_item_refs = extent_item_refs;
4610         else
4611                 rec->extent_item_refs = 0;
4612
4613         if (parent_key)
4614                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4615         else
4616                 memset(&rec->parent_key, 0, sizeof(*parent_key));
4617
4618         if (parent_gen)
4619                 rec->parent_generation = parent_gen;
4620         else
4621                 rec->parent_generation = 0;
4622
4623         rec->cache.start = start;
4624         rec->cache.size = nr;
4625         ret = insert_cache_extent(extent_cache, &rec->cache);
4626         BUG_ON(ret);
4627         bytes_used += nr;
4628         if (set_checked) {
4629                 rec->content_checked = 1;
4630                 rec->owner_ref_checked = 1;
4631         }
4632
4633         if (metadata)
4634                 if (check_crossing_stripes(rec->start, rec->max_size))
4635                         rec->crossing_stripes = 1;
4636         check_extent_type(rec);
4637         return ret;
4638 }
4639
4640 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4641                             u64 parent, u64 root, int found_ref)
4642 {
4643         struct extent_record *rec;
4644         struct tree_backref *back;
4645         struct cache_extent *cache;
4646
4647         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4648         if (!cache) {
4649                 add_extent_rec(extent_cache, NULL, 0, bytenr,
4650                                1, 0, 0, 0, 0, 1, 0, 0);
4651                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4652                 if (!cache)
4653                         abort();
4654         }
4655
4656         rec = container_of(cache, struct extent_record, cache);
4657         if (rec->start != bytenr) {
4658                 abort();
4659         }
4660
4661         back = find_tree_backref(rec, parent, root);
4662         if (!back)
4663                 back = alloc_tree_backref(rec, parent, root);
4664
4665         if (found_ref) {
4666                 if (back->node.found_ref) {
4667                         fprintf(stderr, "Extent back ref already exists "
4668                                 "for %llu parent %llu root %llu \n",
4669                                 (unsigned long long)bytenr,
4670                                 (unsigned long long)parent,
4671                                 (unsigned long long)root);
4672                 }
4673                 back->node.found_ref = 1;
4674         } else {
4675                 if (back->node.found_extent_tree) {
4676                         fprintf(stderr, "Extent back ref already exists "
4677                                 "for %llu parent %llu root %llu \n",
4678                                 (unsigned long long)bytenr,
4679                                 (unsigned long long)parent,
4680                                 (unsigned long long)root);
4681                 }
4682                 back->node.found_extent_tree = 1;
4683         }
4684         check_extent_type(rec);
4685         maybe_free_extent_rec(extent_cache, rec);
4686         return 0;
4687 }
4688
4689 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4690                             u64 parent, u64 root, u64 owner, u64 offset,
4691                             u32 num_refs, int found_ref, u64 max_size)
4692 {
4693         struct extent_record *rec;
4694         struct data_backref *back;
4695         struct cache_extent *cache;
4696
4697         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4698         if (!cache) {
4699                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4700                                0, 0, max_size);
4701                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4702                 if (!cache)
4703                         abort();
4704         }
4705
4706         rec = container_of(cache, struct extent_record, cache);
4707         if (rec->max_size < max_size)
4708                 rec->max_size = max_size;
4709
4710         /*
4711          * If found_ref is set then max_size is the real size and must match the
4712          * existing refs.  So if we have already found a ref then we need to
4713          * make sure that this ref matches the existing one, otherwise we need
4714          * to add a new backref so we can notice that the backrefs don't match
4715          * and we need to figure out who is telling the truth.  This is to
4716          * account for that awful fsync bug I introduced where we'd end up with
4717          * a btrfs_file_extent_item that would have its length include multiple
4718          * prealloc extents or point inside of a prealloc extent.
4719          */
4720         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4721                                  bytenr, max_size);
4722         if (!back)
4723                 back = alloc_data_backref(rec, parent, root, owner, offset,
4724                                           max_size);
4725
4726         if (found_ref) {
4727                 BUG_ON(num_refs != 1);
4728                 if (back->node.found_ref)
4729                         BUG_ON(back->bytes != max_size);
4730                 back->node.found_ref = 1;
4731                 back->found_ref += 1;
4732                 back->bytes = max_size;
4733                 back->disk_bytenr = bytenr;
4734                 rec->refs += 1;
4735                 rec->content_checked = 1;
4736                 rec->owner_ref_checked = 1;
4737         } else {
4738                 if (back->node.found_extent_tree) {
4739                         fprintf(stderr, "Extent back ref already exists "
4740                                 "for %llu parent %llu root %llu "
4741                                 "owner %llu offset %llu num_refs %lu\n",
4742                                 (unsigned long long)bytenr,
4743                                 (unsigned long long)parent,
4744                                 (unsigned long long)root,
4745                                 (unsigned long long)owner,
4746                                 (unsigned long long)offset,
4747                                 (unsigned long)num_refs);
4748                 }
4749                 back->num_refs = num_refs;
4750                 back->node.found_extent_tree = 1;
4751         }
4752         maybe_free_extent_rec(extent_cache, rec);
4753         return 0;
4754 }
4755
4756 static int add_pending(struct cache_tree *pending,
4757                        struct cache_tree *seen, u64 bytenr, u32 size)
4758 {
4759         int ret;
4760         ret = add_cache_extent(seen, bytenr, size);
4761         if (ret)
4762                 return ret;
4763         add_cache_extent(pending, bytenr, size);
4764         return 0;
4765 }
4766
4767 static int pick_next_pending(struct cache_tree *pending,
4768                         struct cache_tree *reada,
4769                         struct cache_tree *nodes,
4770                         u64 last, struct block_info *bits, int bits_nr,
4771                         int *reada_bits)
4772 {
4773         unsigned long node_start = last;
4774         struct cache_extent *cache;
4775         int ret;
4776
4777         cache = search_cache_extent(reada, 0);
4778         if (cache) {
4779                 bits[0].start = cache->start;
4780                 bits[0].size = cache->size;
4781                 *reada_bits = 1;
4782                 return 1;
4783         }
4784         *reada_bits = 0;
4785         if (node_start > 32768)
4786                 node_start -= 32768;
4787
4788         cache = search_cache_extent(nodes, node_start);
4789         if (!cache)
4790                 cache = search_cache_extent(nodes, 0);
4791
4792         if (!cache) {
4793                  cache = search_cache_extent(pending, 0);
4794                  if (!cache)
4795                          return 0;
4796                  ret = 0;
4797                  do {
4798                          bits[ret].start = cache->start;
4799                          bits[ret].size = cache->size;
4800                          cache = next_cache_extent(cache);
4801                          ret++;
4802                  } while (cache && ret < bits_nr);
4803                  return ret;
4804         }
4805
4806         ret = 0;
4807         do {
4808                 bits[ret].start = cache->start;
4809                 bits[ret].size = cache->size;
4810                 cache = next_cache_extent(cache);
4811                 ret++;
4812         } while (cache && ret < bits_nr);
4813
4814         if (bits_nr - ret > 8) {
4815                 u64 lookup = bits[0].start + bits[0].size;
4816                 struct cache_extent *next;
4817                 next = search_cache_extent(pending, lookup);
4818                 while(next) {
4819                         if (next->start - lookup > 32768)
4820                                 break;
4821                         bits[ret].start = next->start;
4822                         bits[ret].size = next->size;
4823                         lookup = next->start + next->size;
4824                         ret++;
4825                         if (ret == bits_nr)
4826                                 break;
4827                         next = next_cache_extent(next);
4828                         if (!next)
4829                                 break;
4830                 }
4831         }
4832         return ret;
4833 }
4834
4835 static void free_chunk_record(struct cache_extent *cache)
4836 {
4837         struct chunk_record *rec;
4838
4839         rec = container_of(cache, struct chunk_record, cache);
4840         list_del_init(&rec->list);
4841         list_del_init(&rec->dextents);
4842         free(rec);
4843 }
4844
4845 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4846 {
4847         cache_tree_free_extents(chunk_cache, free_chunk_record);
4848 }
4849
4850 static void free_device_record(struct rb_node *node)
4851 {
4852         struct device_record *rec;
4853
4854         rec = container_of(node, struct device_record, node);
4855         free(rec);
4856 }
4857
4858 FREE_RB_BASED_TREE(device_cache, free_device_record);
4859
4860 int insert_block_group_record(struct block_group_tree *tree,
4861                               struct block_group_record *bg_rec)
4862 {
4863         int ret;
4864
4865         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4866         if (ret)
4867                 return ret;
4868
4869         list_add_tail(&bg_rec->list, &tree->block_groups);
4870         return 0;
4871 }
4872
4873 static void free_block_group_record(struct cache_extent *cache)
4874 {
4875         struct block_group_record *rec;
4876
4877         rec = container_of(cache, struct block_group_record, cache);
4878         list_del_init(&rec->list);
4879         free(rec);
4880 }
4881
4882 void free_block_group_tree(struct block_group_tree *tree)
4883 {
4884         cache_tree_free_extents(&tree->tree, free_block_group_record);
4885 }
4886
4887 int insert_device_extent_record(struct device_extent_tree *tree,
4888                                 struct device_extent_record *de_rec)
4889 {
4890         int ret;
4891
4892         /*
4893          * Device extent is a bit different from the other extents, because
4894          * the extents which belong to the different devices may have the
4895          * same start and size, so we need use the special extent cache
4896          * search/insert functions.
4897          */
4898         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4899         if (ret)
4900                 return ret;
4901
4902         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4903         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4904         return 0;
4905 }
4906
4907 static void free_device_extent_record(struct cache_extent *cache)
4908 {
4909         struct device_extent_record *rec;
4910
4911         rec = container_of(cache, struct device_extent_record, cache);
4912         if (!list_empty(&rec->chunk_list))
4913                 list_del_init(&rec->chunk_list);
4914         if (!list_empty(&rec->device_list))
4915                 list_del_init(&rec->device_list);
4916         free(rec);
4917 }
4918
4919 void free_device_extent_tree(struct device_extent_tree *tree)
4920 {
4921         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4922 }
4923
4924 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4925 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4926                                  struct extent_buffer *leaf, int slot)
4927 {
4928         struct btrfs_extent_ref_v0 *ref0;
4929         struct btrfs_key key;
4930
4931         btrfs_item_key_to_cpu(leaf, &key, slot);
4932         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4933         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4934                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4935         } else {
4936                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4937                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4938         }
4939         return 0;
4940 }
4941 #endif
4942
4943 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4944                                             struct btrfs_key *key,
4945                                             int slot)
4946 {
4947         struct btrfs_chunk *ptr;
4948         struct chunk_record *rec;
4949         int num_stripes, i;
4950
4951         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4952         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4953
4954         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4955         if (!rec) {
4956                 fprintf(stderr, "memory allocation failed\n");
4957                 exit(-1);
4958         }
4959
4960         INIT_LIST_HEAD(&rec->list);
4961         INIT_LIST_HEAD(&rec->dextents);
4962         rec->bg_rec = NULL;
4963
4964         rec->cache.start = key->offset;
4965         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4966
4967         rec->generation = btrfs_header_generation(leaf);
4968
4969         rec->objectid = key->objectid;
4970         rec->type = key->type;
4971         rec->offset = key->offset;
4972
4973         rec->length = rec->cache.size;
4974         rec->owner = btrfs_chunk_owner(leaf, ptr);
4975         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4976         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4977         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4978         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4979         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4980         rec->num_stripes = num_stripes;
4981         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4982
4983         for (i = 0; i < rec->num_stripes; ++i) {
4984                 rec->stripes[i].devid =
4985                         btrfs_stripe_devid_nr(leaf, ptr, i);
4986                 rec->stripes[i].offset =
4987                         btrfs_stripe_offset_nr(leaf, ptr, i);
4988                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4989                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4990                                 BTRFS_UUID_SIZE);
4991         }
4992
4993         return rec;
4994 }
4995
4996 static int process_chunk_item(struct cache_tree *chunk_cache,
4997                               struct btrfs_key *key, struct extent_buffer *eb,
4998                               int slot)
4999 {
5000         struct chunk_record *rec;
5001         int ret = 0;
5002
5003         rec = btrfs_new_chunk_record(eb, key, slot);
5004         ret = insert_cache_extent(chunk_cache, &rec->cache);
5005         if (ret) {
5006                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5007                         rec->offset, rec->length);
5008                 free(rec);
5009         }
5010
5011         return ret;
5012 }
5013
5014 static int process_device_item(struct rb_root *dev_cache,
5015                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5016 {
5017         struct btrfs_dev_item *ptr;
5018         struct device_record *rec;
5019         int ret = 0;
5020
5021         ptr = btrfs_item_ptr(eb,
5022                 slot, struct btrfs_dev_item);
5023
5024         rec = malloc(sizeof(*rec));
5025         if (!rec) {
5026                 fprintf(stderr, "memory allocation failed\n");
5027                 return -ENOMEM;
5028         }
5029
5030         rec->devid = key->offset;
5031         rec->generation = btrfs_header_generation(eb);
5032
5033         rec->objectid = key->objectid;
5034         rec->type = key->type;
5035         rec->offset = key->offset;
5036
5037         rec->devid = btrfs_device_id(eb, ptr);
5038         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5039         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5040
5041         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5042         if (ret) {
5043                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5044                 free(rec);
5045         }
5046
5047         return ret;
5048 }
5049
5050 struct block_group_record *
5051 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5052                              int slot)
5053 {
5054         struct btrfs_block_group_item *ptr;
5055         struct block_group_record *rec;
5056
5057         rec = calloc(1, sizeof(*rec));
5058         if (!rec) {
5059                 fprintf(stderr, "memory allocation failed\n");
5060                 exit(-1);
5061         }
5062
5063         rec->cache.start = key->objectid;
5064         rec->cache.size = key->offset;
5065
5066         rec->generation = btrfs_header_generation(leaf);
5067
5068         rec->objectid = key->objectid;
5069         rec->type = key->type;
5070         rec->offset = key->offset;
5071
5072         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5073         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5074
5075         INIT_LIST_HEAD(&rec->list);
5076
5077         return rec;
5078 }
5079
5080 static int process_block_group_item(struct block_group_tree *block_group_cache,
5081                                     struct btrfs_key *key,
5082                                     struct extent_buffer *eb, int slot)
5083 {
5084         struct block_group_record *rec;
5085         int ret = 0;
5086
5087         rec = btrfs_new_block_group_record(eb, key, slot);
5088         ret = insert_block_group_record(block_group_cache, rec);
5089         if (ret) {
5090                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5091                         rec->objectid, rec->offset);
5092                 free(rec);
5093         }
5094
5095         return ret;
5096 }
5097
5098 struct device_extent_record *
5099 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5100                                struct btrfs_key *key, int slot)
5101 {
5102         struct device_extent_record *rec;
5103         struct btrfs_dev_extent *ptr;
5104
5105         rec = calloc(1, sizeof(*rec));
5106         if (!rec) {
5107                 fprintf(stderr, "memory allocation failed\n");
5108                 exit(-1);
5109         }
5110
5111         rec->cache.objectid = key->objectid;
5112         rec->cache.start = key->offset;
5113
5114         rec->generation = btrfs_header_generation(leaf);
5115
5116         rec->objectid = key->objectid;
5117         rec->type = key->type;
5118         rec->offset = key->offset;
5119
5120         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5121         rec->chunk_objecteid =
5122                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5123         rec->chunk_offset =
5124                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5125         rec->length = btrfs_dev_extent_length(leaf, ptr);
5126         rec->cache.size = rec->length;
5127
5128         INIT_LIST_HEAD(&rec->chunk_list);
5129         INIT_LIST_HEAD(&rec->device_list);
5130
5131         return rec;
5132 }
5133
5134 static int
5135 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5136                            struct btrfs_key *key, struct extent_buffer *eb,
5137                            int slot)
5138 {
5139         struct device_extent_record *rec;
5140         int ret;
5141
5142         rec = btrfs_new_device_extent_record(eb, key, slot);
5143         ret = insert_device_extent_record(dev_extent_cache, rec);
5144         if (ret) {
5145                 fprintf(stderr,
5146                         "Device extent[%llu, %llu, %llu] existed.\n",
5147                         rec->objectid, rec->offset, rec->length);
5148                 free(rec);
5149         }
5150
5151         return ret;
5152 }
5153
5154 static int process_extent_item(struct btrfs_root *root,
5155                                struct cache_tree *extent_cache,
5156                                struct extent_buffer *eb, int slot)
5157 {
5158         struct btrfs_extent_item *ei;
5159         struct btrfs_extent_inline_ref *iref;
5160         struct btrfs_extent_data_ref *dref;
5161         struct btrfs_shared_data_ref *sref;
5162         struct btrfs_key key;
5163         unsigned long end;
5164         unsigned long ptr;
5165         int type;
5166         u32 item_size = btrfs_item_size_nr(eb, slot);
5167         u64 refs = 0;
5168         u64 offset;
5169         u64 num_bytes;
5170         int metadata = 0;
5171
5172         btrfs_item_key_to_cpu(eb, &key, slot);
5173
5174         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5175                 metadata = 1;
5176                 num_bytes = root->leafsize;
5177         } else {
5178                 num_bytes = key.offset;
5179         }
5180
5181         if (item_size < sizeof(*ei)) {
5182 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5183                 struct btrfs_extent_item_v0 *ei0;
5184                 BUG_ON(item_size != sizeof(*ei0));
5185                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5186                 refs = btrfs_extent_refs_v0(eb, ei0);
5187 #else
5188                 BUG();
5189 #endif
5190                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5191                                       num_bytes, refs, 0, 0, 0, metadata, 1,
5192                                       num_bytes);
5193         }
5194
5195         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5196         refs = btrfs_extent_refs(eb, ei);
5197         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5198                 metadata = 1;
5199         else
5200                 metadata = 0;
5201
5202         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5203                        refs, 0, 0, 0, metadata, 1, num_bytes);
5204
5205         ptr = (unsigned long)(ei + 1);
5206         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5207             key.type == BTRFS_EXTENT_ITEM_KEY)
5208                 ptr += sizeof(struct btrfs_tree_block_info);
5209
5210         end = (unsigned long)ei + item_size;
5211         while (ptr < end) {
5212                 iref = (struct btrfs_extent_inline_ref *)ptr;
5213                 type = btrfs_extent_inline_ref_type(eb, iref);
5214                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5215                 switch (type) {
5216                 case BTRFS_TREE_BLOCK_REF_KEY:
5217                         add_tree_backref(extent_cache, key.objectid,
5218                                          0, offset, 0);
5219                         break;
5220                 case BTRFS_SHARED_BLOCK_REF_KEY:
5221                         add_tree_backref(extent_cache, key.objectid,
5222                                          offset, 0, 0);
5223                         break;
5224                 case BTRFS_EXTENT_DATA_REF_KEY:
5225                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5226                         add_data_backref(extent_cache, key.objectid, 0,
5227                                         btrfs_extent_data_ref_root(eb, dref),
5228                                         btrfs_extent_data_ref_objectid(eb,
5229                                                                        dref),
5230                                         btrfs_extent_data_ref_offset(eb, dref),
5231                                         btrfs_extent_data_ref_count(eb, dref),
5232                                         0, num_bytes);
5233                         break;
5234                 case BTRFS_SHARED_DATA_REF_KEY:
5235                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5236                         add_data_backref(extent_cache, key.objectid, offset,
5237                                         0, 0, 0,
5238                                         btrfs_shared_data_ref_count(eb, sref),
5239                                         0, num_bytes);
5240                         break;
5241                 default:
5242                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5243                                 key.objectid, key.type, num_bytes);
5244                         goto out;
5245                 }
5246                 ptr += btrfs_extent_inline_ref_size(type);
5247         }
5248         WARN_ON(ptr > end);
5249 out:
5250         return 0;
5251 }
5252
5253 static int check_cache_range(struct btrfs_root *root,
5254                              struct btrfs_block_group_cache *cache,
5255                              u64 offset, u64 bytes)
5256 {
5257         struct btrfs_free_space *entry;
5258         u64 *logical;
5259         u64 bytenr;
5260         int stripe_len;
5261         int i, nr, ret;
5262
5263         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5264                 bytenr = btrfs_sb_offset(i);
5265                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5266                                        cache->key.objectid, bytenr, 0,
5267                                        &logical, &nr, &stripe_len);
5268                 if (ret)
5269                         return ret;
5270
5271                 while (nr--) {
5272                         if (logical[nr] + stripe_len <= offset)
5273                                 continue;
5274                         if (offset + bytes <= logical[nr])
5275                                 continue;
5276                         if (logical[nr] == offset) {
5277                                 if (stripe_len >= bytes) {
5278                                         kfree(logical);
5279                                         return 0;
5280                                 }
5281                                 bytes -= stripe_len;
5282                                 offset += stripe_len;
5283                         } else if (logical[nr] < offset) {
5284                                 if (logical[nr] + stripe_len >=
5285                                     offset + bytes) {
5286                                         kfree(logical);
5287                                         return 0;
5288                                 }
5289                                 bytes = (offset + bytes) -
5290                                         (logical[nr] + stripe_len);
5291                                 offset = logical[nr] + stripe_len;
5292                         } else {
5293                                 /*
5294                                  * Could be tricky, the super may land in the
5295                                  * middle of the area we're checking.  First
5296                                  * check the easiest case, it's at the end.
5297                                  */
5298                                 if (logical[nr] + stripe_len >=
5299                                     bytes + offset) {
5300                                         bytes = logical[nr] - offset;
5301                                         continue;
5302                                 }
5303
5304                                 /* Check the left side */
5305                                 ret = check_cache_range(root, cache,
5306                                                         offset,
5307                                                         logical[nr] - offset);
5308                                 if (ret) {
5309                                         kfree(logical);
5310                                         return ret;
5311                                 }
5312
5313                                 /* Now we continue with the right side */
5314                                 bytes = (offset + bytes) -
5315                                         (logical[nr] + stripe_len);
5316                                 offset = logical[nr] + stripe_len;
5317                         }
5318                 }
5319
5320                 kfree(logical);
5321         }
5322
5323         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5324         if (!entry) {
5325                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5326                         offset, offset+bytes);
5327                 return -EINVAL;
5328         }
5329
5330         if (entry->offset != offset) {
5331                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5332                         entry->offset);
5333                 return -EINVAL;
5334         }
5335
5336         if (entry->bytes != bytes) {
5337                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5338                         bytes, entry->bytes, offset);
5339                 return -EINVAL;
5340         }
5341
5342         unlink_free_space(cache->free_space_ctl, entry);
5343         free(entry);
5344         return 0;
5345 }
5346
5347 static int verify_space_cache(struct btrfs_root *root,
5348                               struct btrfs_block_group_cache *cache)
5349 {
5350         struct btrfs_path *path;
5351         struct extent_buffer *leaf;
5352         struct btrfs_key key;
5353         u64 last;
5354         int ret = 0;
5355
5356         path = btrfs_alloc_path();
5357         if (!path)
5358                 return -ENOMEM;
5359
5360         root = root->fs_info->extent_root;
5361
5362         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5363
5364         key.objectid = last;
5365         key.offset = 0;
5366         key.type = BTRFS_EXTENT_ITEM_KEY;
5367
5368         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5369         if (ret < 0)
5370                 goto out;
5371         ret = 0;
5372         while (1) {
5373                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5374                         ret = btrfs_next_leaf(root, path);
5375                         if (ret < 0)
5376                                 goto out;
5377                         if (ret > 0) {
5378                                 ret = 0;
5379                                 break;
5380                         }
5381                 }
5382                 leaf = path->nodes[0];
5383                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5384                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5385                         break;
5386                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5387                     key.type != BTRFS_METADATA_ITEM_KEY) {
5388                         path->slots[0]++;
5389                         continue;
5390                 }
5391
5392                 if (last == key.objectid) {
5393                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5394                                 last = key.objectid + key.offset;
5395                         else
5396                                 last = key.objectid + root->leafsize;
5397                         path->slots[0]++;
5398                         continue;
5399                 }
5400
5401                 ret = check_cache_range(root, cache, last,
5402                                         key.objectid - last);
5403                 if (ret)
5404                         break;
5405                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5406                         last = key.objectid + key.offset;
5407                 else
5408                         last = key.objectid + root->leafsize;
5409                 path->slots[0]++;
5410         }
5411
5412         if (last < cache->key.objectid + cache->key.offset)
5413                 ret = check_cache_range(root, cache, last,
5414                                         cache->key.objectid +
5415                                         cache->key.offset - last);
5416
5417 out:
5418         btrfs_free_path(path);
5419
5420         if (!ret &&
5421             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5422                 fprintf(stderr, "There are still entries left in the space "
5423                         "cache\n");
5424                 ret = -EINVAL;
5425         }
5426
5427         return ret;
5428 }
5429
5430 static int check_space_cache(struct btrfs_root *root)
5431 {
5432         struct btrfs_block_group_cache *cache;
5433         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5434         int ret;
5435         int error = 0;
5436
5437         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5438             btrfs_super_generation(root->fs_info->super_copy) !=
5439             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5440                 printf("cache and super generation don't match, space cache "
5441                        "will be invalidated\n");
5442                 return 0;
5443         }
5444
5445         if (ctx.progress_enabled) {
5446                 ctx.tp = TASK_FREE_SPACE;
5447                 task_start(ctx.info);
5448         }
5449
5450         while (1) {
5451                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5452                 if (!cache)
5453                         break;
5454
5455                 start = cache->key.objectid + cache->key.offset;
5456                 if (!cache->free_space_ctl) {
5457                         if (btrfs_init_free_space_ctl(cache,
5458                                                       root->sectorsize)) {
5459                                 ret = -ENOMEM;
5460                                 break;
5461                         }
5462                 } else {
5463                         btrfs_remove_free_space_cache(cache);
5464                 }
5465
5466                 ret = load_free_space_cache(root->fs_info, cache);
5467                 if (!ret)
5468                         continue;
5469
5470                 ret = verify_space_cache(root, cache);
5471                 if (ret) {
5472                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
5473                                 cache->key.objectid);
5474                         error++;
5475                 }
5476         }
5477
5478         task_stop(ctx.info);
5479
5480         return error ? -EINVAL : 0;
5481 }
5482
5483 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5484                         u64 num_bytes, unsigned long leaf_offset,
5485                         struct extent_buffer *eb) {
5486
5487         u64 offset = 0;
5488         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5489         char *data;
5490         unsigned long csum_offset;
5491         u32 csum;
5492         u32 csum_expected;
5493         u64 read_len;
5494         u64 data_checked = 0;
5495         u64 tmp;
5496         int ret = 0;
5497         int mirror;
5498         int num_copies;
5499
5500         if (num_bytes % root->sectorsize)
5501                 return -EINVAL;
5502
5503         data = malloc(num_bytes);
5504         if (!data)
5505                 return -ENOMEM;
5506
5507         while (offset < num_bytes) {
5508                 mirror = 0;
5509 again:
5510                 read_len = num_bytes - offset;
5511                 /* read as much space once a time */
5512                 ret = read_extent_data(root, data + offset,
5513                                 bytenr + offset, &read_len, mirror);
5514                 if (ret)
5515                         goto out;
5516                 data_checked = 0;
5517                 /* verify every 4k data's checksum */
5518                 while (data_checked < read_len) {
5519                         csum = ~(u32)0;
5520                         tmp = offset + data_checked;
5521
5522                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5523                                                csum, root->sectorsize);
5524                         btrfs_csum_final(csum, (char *)&csum);
5525
5526                         csum_offset = leaf_offset +
5527                                  tmp / root->sectorsize * csum_size;
5528                         read_extent_buffer(eb, (char *)&csum_expected,
5529                                            csum_offset, csum_size);
5530                         /* try another mirror */
5531                         if (csum != csum_expected) {
5532                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5533                                                 mirror, bytenr + tmp,
5534                                                 csum, csum_expected);
5535                                 num_copies = btrfs_num_copies(
5536                                                 &root->fs_info->mapping_tree,
5537                                                 bytenr, num_bytes);
5538                                 if (mirror < num_copies - 1) {
5539                                         mirror += 1;
5540                                         goto again;
5541                                 }
5542                         }
5543                         data_checked += root->sectorsize;
5544                 }
5545                 offset += read_len;
5546         }
5547 out:
5548         free(data);
5549         return ret;
5550 }
5551
5552 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5553                                u64 num_bytes)
5554 {
5555         struct btrfs_path *path;
5556         struct extent_buffer *leaf;
5557         struct btrfs_key key;
5558         int ret;
5559
5560         path = btrfs_alloc_path();
5561         if (!path) {
5562                 fprintf(stderr, "Error allocing path\n");
5563                 return -ENOMEM;
5564         }
5565
5566         key.objectid = bytenr;
5567         key.type = BTRFS_EXTENT_ITEM_KEY;
5568         key.offset = (u64)-1;
5569
5570 again:
5571         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5572                                 0, 0);
5573         if (ret < 0) {
5574                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5575                 btrfs_free_path(path);
5576                 return ret;
5577         } else if (ret) {
5578                 if (path->slots[0] > 0) {
5579                         path->slots[0]--;
5580                 } else {
5581                         ret = btrfs_prev_leaf(root, path);
5582                         if (ret < 0) {
5583                                 goto out;
5584                         } else if (ret > 0) {
5585                                 ret = 0;
5586                                 goto out;
5587                         }
5588                 }
5589         }
5590
5591         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5592
5593         /*
5594          * Block group items come before extent items if they have the same
5595          * bytenr, so walk back one more just in case.  Dear future traveler,
5596          * first congrats on mastering time travel.  Now if it's not too much
5597          * trouble could you go back to 2006 and tell Chris to make the
5598          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5599          * EXTENT_ITEM_KEY please?
5600          */
5601         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5602                 if (path->slots[0] > 0) {
5603                         path->slots[0]--;
5604                 } else {
5605                         ret = btrfs_prev_leaf(root, path);
5606                         if (ret < 0) {
5607                                 goto out;
5608                         } else if (ret > 0) {
5609                                 ret = 0;
5610                                 goto out;
5611                         }
5612                 }
5613                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5614         }
5615
5616         while (num_bytes) {
5617                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5618                         ret = btrfs_next_leaf(root, path);
5619                         if (ret < 0) {
5620                                 fprintf(stderr, "Error going to next leaf "
5621                                         "%d\n", ret);
5622                                 btrfs_free_path(path);
5623                                 return ret;
5624                         } else if (ret) {
5625                                 break;
5626                         }
5627                 }
5628                 leaf = path->nodes[0];
5629                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5630                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5631                         path->slots[0]++;
5632                         continue;
5633                 }
5634                 if (key.objectid + key.offset < bytenr) {
5635                         path->slots[0]++;
5636                         continue;
5637                 }
5638                 if (key.objectid > bytenr + num_bytes)
5639                         break;
5640
5641                 if (key.objectid == bytenr) {
5642                         if (key.offset >= num_bytes) {
5643                                 num_bytes = 0;
5644                                 break;
5645                         }
5646                         num_bytes -= key.offset;
5647                         bytenr += key.offset;
5648                 } else if (key.objectid < bytenr) {
5649                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5650                                 num_bytes = 0;
5651                                 break;
5652                         }
5653                         num_bytes = (bytenr + num_bytes) -
5654                                 (key.objectid + key.offset);
5655                         bytenr = key.objectid + key.offset;
5656                 } else {
5657                         if (key.objectid + key.offset < bytenr + num_bytes) {
5658                                 u64 new_start = key.objectid + key.offset;
5659                                 u64 new_bytes = bytenr + num_bytes - new_start;
5660
5661                                 /*
5662                                  * Weird case, the extent is in the middle of
5663                                  * our range, we'll have to search one side
5664                                  * and then the other.  Not sure if this happens
5665                                  * in real life, but no harm in coding it up
5666                                  * anyway just in case.
5667                                  */
5668                                 btrfs_release_path(path);
5669                                 ret = check_extent_exists(root, new_start,
5670                                                           new_bytes);
5671                                 if (ret) {
5672                                         fprintf(stderr, "Right section didn't "
5673                                                 "have a record\n");
5674                                         break;
5675                                 }
5676                                 num_bytes = key.objectid - bytenr;
5677                                 goto again;
5678                         }
5679                         num_bytes = key.objectid - bytenr;
5680                 }
5681                 path->slots[0]++;
5682         }
5683         ret = 0;
5684
5685 out:
5686         if (num_bytes && !ret) {
5687                 fprintf(stderr, "There are no extents for csum range "
5688                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5689                 ret = 1;
5690         }
5691
5692         btrfs_free_path(path);
5693         return ret;
5694 }
5695
5696 static int check_csums(struct btrfs_root *root)
5697 {
5698         struct btrfs_path *path;
5699         struct extent_buffer *leaf;
5700         struct btrfs_key key;
5701         u64 offset = 0, num_bytes = 0;
5702         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5703         int errors = 0;
5704         int ret;
5705         u64 data_len;
5706         unsigned long leaf_offset;
5707
5708         root = root->fs_info->csum_root;
5709         if (!extent_buffer_uptodate(root->node)) {
5710                 fprintf(stderr, "No valid csum tree found\n");
5711                 return -ENOENT;
5712         }
5713
5714         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5715         key.type = BTRFS_EXTENT_CSUM_KEY;
5716         key.offset = 0;
5717
5718         path = btrfs_alloc_path();
5719         if (!path)
5720                 return -ENOMEM;
5721
5722         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5723         if (ret < 0) {
5724                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5725                 btrfs_free_path(path);
5726                 return ret;
5727         }
5728
5729         if (ret > 0 && path->slots[0])
5730                 path->slots[0]--;
5731         ret = 0;
5732
5733         while (1) {
5734                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5735                         ret = btrfs_next_leaf(root, path);
5736                         if (ret < 0) {
5737                                 fprintf(stderr, "Error going to next leaf "
5738                                         "%d\n", ret);
5739                                 break;
5740                         }
5741                         if (ret)
5742                                 break;
5743                 }
5744                 leaf = path->nodes[0];
5745
5746                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5747                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5748                         path->slots[0]++;
5749                         continue;
5750                 }
5751
5752                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5753                               csum_size) * root->sectorsize;
5754                 if (!check_data_csum)
5755                         goto skip_csum_check;
5756                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5757                 ret = check_extent_csums(root, key.offset, data_len,
5758                                          leaf_offset, leaf);
5759                 if (ret)
5760                         break;
5761 skip_csum_check:
5762                 if (!num_bytes) {
5763                         offset = key.offset;
5764                 } else if (key.offset != offset + num_bytes) {
5765                         ret = check_extent_exists(root, offset, num_bytes);
5766                         if (ret) {
5767                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5768                                         "there is no extent record\n",
5769                                         offset, offset+num_bytes);
5770                                 errors++;
5771                         }
5772                         offset = key.offset;
5773                         num_bytes = 0;
5774                 }
5775                 num_bytes += data_len;
5776                 path->slots[0]++;
5777         }
5778
5779         btrfs_free_path(path);
5780         return errors;
5781 }
5782
5783 static int is_dropped_key(struct btrfs_key *key,
5784                           struct btrfs_key *drop_key) {
5785         if (key->objectid < drop_key->objectid)
5786                 return 1;
5787         else if (key->objectid == drop_key->objectid) {
5788                 if (key->type < drop_key->type)
5789                         return 1;
5790                 else if (key->type == drop_key->type) {
5791                         if (key->offset < drop_key->offset)
5792                                 return 1;
5793                 }
5794         }
5795         return 0;
5796 }
5797
5798 /*
5799  * Here are the rules for FULL_BACKREF.
5800  *
5801  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5802  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5803  *      FULL_BACKREF set.
5804  * 3) We cow'ed the block walking down a reloc tree.  This is impossible to tell
5805  *    if it happened after the relocation occurred since we'll have dropped the
5806  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5807  *    have no real way to know for sure.
5808  *
5809  * We process the blocks one root at a time, and we start from the lowest root
5810  * objectid and go to the highest.  So we can just lookup the owner backref for
5811  * the record and if we don't find it then we know it doesn't exist and we have
5812  * a FULL BACKREF.
5813  *
5814  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5815  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5816  * be set or not and then we can check later once we've gathered all the refs.
5817  */
5818 static int calc_extent_flag(struct btrfs_root *root,
5819                            struct cache_tree *extent_cache,
5820                            struct extent_buffer *buf,
5821                            struct root_item_record *ri,
5822                            u64 *flags)
5823 {
5824         struct extent_record *rec;
5825         struct cache_extent *cache;
5826         struct tree_backref *tback;
5827         u64 owner = 0;
5828
5829         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5830         /* we have added this extent before */
5831         BUG_ON(!cache);
5832         rec = container_of(cache, struct extent_record, cache);
5833
5834         /*
5835          * Except file/reloc tree, we can not have
5836          * FULL BACKREF MODE
5837          */
5838         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5839                 goto normal;
5840         /*
5841          * root node
5842          */
5843         if (buf->start == ri->bytenr)
5844                 goto normal;
5845
5846         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5847                 goto full_backref;
5848
5849         owner = btrfs_header_owner(buf);
5850         if (owner == ri->objectid)
5851                 goto normal;
5852
5853         tback = find_tree_backref(rec, 0, owner);
5854         if (!tback)
5855                 goto full_backref;
5856 normal:
5857         *flags = 0;
5858         if (rec->flag_block_full_backref != -1 &&
5859             rec->flag_block_full_backref != 0)
5860                 rec->bad_full_backref = 1;
5861         return 0;
5862 full_backref:
5863         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5864         if (rec->flag_block_full_backref != -1 &&
5865             rec->flag_block_full_backref != 1)
5866                 rec->bad_full_backref = 1;
5867         return 0;
5868 }
5869
5870 static int run_next_block(struct btrfs_root *root,
5871                           struct block_info *bits,
5872                           int bits_nr,
5873                           u64 *last,
5874                           struct cache_tree *pending,
5875                           struct cache_tree *seen,
5876                           struct cache_tree *reada,
5877                           struct cache_tree *nodes,
5878                           struct cache_tree *extent_cache,
5879                           struct cache_tree *chunk_cache,
5880                           struct rb_root *dev_cache,
5881                           struct block_group_tree *block_group_cache,
5882                           struct device_extent_tree *dev_extent_cache,
5883                           struct root_item_record *ri)
5884 {
5885         struct extent_buffer *buf;
5886         struct extent_record *rec = NULL;
5887         u64 bytenr;
5888         u32 size;
5889         u64 parent;
5890         u64 owner;
5891         u64 flags;
5892         u64 ptr;
5893         u64 gen = 0;
5894         int ret = 0;
5895         int i;
5896         int nritems;
5897         struct btrfs_key key;
5898         struct cache_extent *cache;
5899         int reada_bits;
5900
5901         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5902                                     bits_nr, &reada_bits);
5903         if (nritems == 0)
5904                 return 1;
5905
5906         if (!reada_bits) {
5907                 for(i = 0; i < nritems; i++) {
5908                         ret = add_cache_extent(reada, bits[i].start,
5909                                                bits[i].size);
5910                         if (ret == -EEXIST)
5911                                 continue;
5912
5913                         /* fixme, get the parent transid */
5914                         readahead_tree_block(root, bits[i].start,
5915                                              bits[i].size, 0);
5916                 }
5917         }
5918         *last = bits[0].start;
5919         bytenr = bits[0].start;
5920         size = bits[0].size;
5921
5922         cache = lookup_cache_extent(pending, bytenr, size);
5923         if (cache) {
5924                 remove_cache_extent(pending, cache);
5925                 free(cache);
5926         }
5927         cache = lookup_cache_extent(reada, bytenr, size);
5928         if (cache) {
5929                 remove_cache_extent(reada, cache);
5930                 free(cache);
5931         }
5932         cache = lookup_cache_extent(nodes, bytenr, size);
5933         if (cache) {
5934                 remove_cache_extent(nodes, cache);
5935                 free(cache);
5936         }
5937         cache = lookup_cache_extent(extent_cache, bytenr, size);
5938         if (cache) {
5939                 rec = container_of(cache, struct extent_record, cache);
5940                 gen = rec->parent_generation;
5941         }
5942
5943         /* fixme, get the real parent transid */
5944         buf = read_tree_block(root, bytenr, size, gen);
5945         if (!extent_buffer_uptodate(buf)) {
5946                 record_bad_block_io(root->fs_info,
5947                                     extent_cache, bytenr, size);
5948                 goto out;
5949         }
5950
5951         nritems = btrfs_header_nritems(buf);
5952
5953         flags = 0;
5954         if (!init_extent_tree) {
5955                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5956                                        btrfs_header_level(buf), 1, NULL,
5957                                        &flags);
5958                 if (ret < 0) {
5959                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5960                         if (ret < 0) {
5961                                 fprintf(stderr, "Couldn't calc extent flags\n");
5962                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5963                         }
5964                 }
5965         } else {
5966                 flags = 0;
5967                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5968                 if (ret < 0) {
5969                         fprintf(stderr, "Couldn't calc extent flags\n");
5970                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5971                 }
5972         }
5973
5974         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5975                 if (ri != NULL &&
5976                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5977                     ri->objectid == btrfs_header_owner(buf)) {
5978                         /*
5979                          * Ok we got to this block from it's original owner and
5980                          * we have FULL_BACKREF set.  Relocation can leave
5981                          * converted blocks over so this is altogether possible,
5982                          * however it's not possible if the generation > the
5983                          * last snapshot, so check for this case.
5984                          */
5985                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5986                             btrfs_header_generation(buf) > ri->last_snapshot) {
5987                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5988                                 rec->bad_full_backref = 1;
5989                         }
5990                 }
5991         } else {
5992                 if (ri != NULL &&
5993                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5994                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5995                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5996                         rec->bad_full_backref = 1;
5997                 }
5998         }
5999
6000         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6001                 rec->flag_block_full_backref = 1;
6002                 parent = bytenr;
6003                 owner = 0;
6004         } else {
6005                 rec->flag_block_full_backref = 0;
6006                 parent = 0;
6007                 owner = btrfs_header_owner(buf);
6008         }
6009
6010         ret = check_block(root, extent_cache, buf, flags);
6011         if (ret)
6012                 goto out;
6013
6014         if (btrfs_is_leaf(buf)) {
6015                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6016                 for (i = 0; i < nritems; i++) {
6017                         struct btrfs_file_extent_item *fi;
6018                         btrfs_item_key_to_cpu(buf, &key, i);
6019                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6020                                 process_extent_item(root, extent_cache, buf,
6021                                                     i);
6022                                 continue;
6023                         }
6024                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6025                                 process_extent_item(root, extent_cache, buf,
6026                                                     i);
6027                                 continue;
6028                         }
6029                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6030                                 total_csum_bytes +=
6031                                         btrfs_item_size_nr(buf, i);
6032                                 continue;
6033                         }
6034                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6035                                 process_chunk_item(chunk_cache, &key, buf, i);
6036                                 continue;
6037                         }
6038                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6039                                 process_device_item(dev_cache, &key, buf, i);
6040                                 continue;
6041                         }
6042                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6043                                 process_block_group_item(block_group_cache,
6044                                         &key, buf, i);
6045                                 continue;
6046                         }
6047                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6048                                 process_device_extent_item(dev_extent_cache,
6049                                         &key, buf, i);
6050                                 continue;
6051
6052                         }
6053                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6054 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6055                                 process_extent_ref_v0(extent_cache, buf, i);
6056 #else
6057                                 BUG();
6058 #endif
6059                                 continue;
6060                         }
6061
6062                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6063                                 add_tree_backref(extent_cache, key.objectid, 0,
6064                                                  key.offset, 0);
6065                                 continue;
6066                         }
6067                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6068                                 add_tree_backref(extent_cache, key.objectid,
6069                                                  key.offset, 0, 0);
6070                                 continue;
6071                         }
6072                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6073                                 struct btrfs_extent_data_ref *ref;
6074                                 ref = btrfs_item_ptr(buf, i,
6075                                                 struct btrfs_extent_data_ref);
6076                                 add_data_backref(extent_cache,
6077                                         key.objectid, 0,
6078                                         btrfs_extent_data_ref_root(buf, ref),
6079                                         btrfs_extent_data_ref_objectid(buf,
6080                                                                        ref),
6081                                         btrfs_extent_data_ref_offset(buf, ref),
6082                                         btrfs_extent_data_ref_count(buf, ref),
6083                                         0, root->sectorsize);
6084                                 continue;
6085                         }
6086                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6087                                 struct btrfs_shared_data_ref *ref;
6088                                 ref = btrfs_item_ptr(buf, i,
6089                                                 struct btrfs_shared_data_ref);
6090                                 add_data_backref(extent_cache,
6091                                         key.objectid, key.offset, 0, 0, 0,
6092                                         btrfs_shared_data_ref_count(buf, ref),
6093                                         0, root->sectorsize);
6094                                 continue;
6095                         }
6096                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6097                                 struct bad_item *bad;
6098
6099                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6100                                         continue;
6101                                 if (!owner)
6102                                         continue;
6103                                 bad = malloc(sizeof(struct bad_item));
6104                                 if (!bad)
6105                                         continue;
6106                                 INIT_LIST_HEAD(&bad->list);
6107                                 memcpy(&bad->key, &key,
6108                                        sizeof(struct btrfs_key));
6109                                 bad->root_id = owner;
6110                                 list_add_tail(&bad->list, &delete_items);
6111                                 continue;
6112                         }
6113                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6114                                 continue;
6115                         fi = btrfs_item_ptr(buf, i,
6116                                             struct btrfs_file_extent_item);
6117                         if (btrfs_file_extent_type(buf, fi) ==
6118                             BTRFS_FILE_EXTENT_INLINE)
6119                                 continue;
6120                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6121                                 continue;
6122
6123                         data_bytes_allocated +=
6124                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6125                         if (data_bytes_allocated < root->sectorsize) {
6126                                 abort();
6127                         }
6128                         data_bytes_referenced +=
6129                                 btrfs_file_extent_num_bytes(buf, fi);
6130                         add_data_backref(extent_cache,
6131                                 btrfs_file_extent_disk_bytenr(buf, fi),
6132                                 parent, owner, key.objectid, key.offset -
6133                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6134                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6135                 }
6136         } else {
6137                 int level;
6138                 struct btrfs_key first_key;
6139
6140                 first_key.objectid = 0;
6141
6142                 if (nritems > 0)
6143                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6144                 level = btrfs_header_level(buf);
6145                 for (i = 0; i < nritems; i++) {
6146                         ptr = btrfs_node_blockptr(buf, i);
6147                         size = btrfs_level_size(root, level - 1);
6148                         btrfs_node_key_to_cpu(buf, &key, i);
6149                         if (ri != NULL) {
6150                                 if ((level == ri->drop_level)
6151                                     && is_dropped_key(&key, &ri->drop_key)) {
6152                                         continue;
6153                                 }
6154                         }
6155                         ret = add_extent_rec(extent_cache, &key,
6156                                              btrfs_node_ptr_generation(buf, i),
6157                                              ptr, size, 0, 0, 1, 0, 1, 0,
6158                                              size);
6159                         BUG_ON(ret);
6160
6161                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6162
6163                         if (level > 1) {
6164                                 add_pending(nodes, seen, ptr, size);
6165                         } else {
6166                                 add_pending(pending, seen, ptr, size);
6167                         }
6168                 }
6169                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6170                                       nritems) * sizeof(struct btrfs_key_ptr);
6171         }
6172         total_btree_bytes += buf->len;
6173         if (fs_root_objectid(btrfs_header_owner(buf)))
6174                 total_fs_tree_bytes += buf->len;
6175         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6176                 total_extent_tree_bytes += buf->len;
6177         if (!found_old_backref &&
6178             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6179             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6180             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6181                 found_old_backref = 1;
6182 out:
6183         free_extent_buffer(buf);
6184         return ret;
6185 }
6186
6187 static int add_root_to_pending(struct extent_buffer *buf,
6188                                struct cache_tree *extent_cache,
6189                                struct cache_tree *pending,
6190                                struct cache_tree *seen,
6191                                struct cache_tree *nodes,
6192                                u64 objectid)
6193 {
6194         if (btrfs_header_level(buf) > 0)
6195                 add_pending(nodes, seen, buf->start, buf->len);
6196         else
6197                 add_pending(pending, seen, buf->start, buf->len);
6198         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6199                        0, 1, 1, 0, 1, 0, buf->len);
6200
6201         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6202             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6203                 add_tree_backref(extent_cache, buf->start, buf->start,
6204                                  0, 1);
6205         else
6206                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6207         return 0;
6208 }
6209
6210 /* as we fix the tree, we might be deleting blocks that
6211  * we're tracking for repair.  This hook makes sure we
6212  * remove any backrefs for blocks as we are fixing them.
6213  */
6214 static int free_extent_hook(struct btrfs_trans_handle *trans,
6215                             struct btrfs_root *root,
6216                             u64 bytenr, u64 num_bytes, u64 parent,
6217                             u64 root_objectid, u64 owner, u64 offset,
6218                             int refs_to_drop)
6219 {
6220         struct extent_record *rec;
6221         struct cache_extent *cache;
6222         int is_data;
6223         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6224
6225         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6226         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6227         if (!cache)
6228                 return 0;
6229
6230         rec = container_of(cache, struct extent_record, cache);
6231         if (is_data) {
6232                 struct data_backref *back;
6233                 back = find_data_backref(rec, parent, root_objectid, owner,
6234                                          offset, 1, bytenr, num_bytes);
6235                 if (!back)
6236                         goto out;
6237                 if (back->node.found_ref) {
6238                         back->found_ref -= refs_to_drop;
6239                         if (rec->refs)
6240                                 rec->refs -= refs_to_drop;
6241                 }
6242                 if (back->node.found_extent_tree) {
6243                         back->num_refs -= refs_to_drop;
6244                         if (rec->extent_item_refs)
6245                                 rec->extent_item_refs -= refs_to_drop;
6246                 }
6247                 if (back->found_ref == 0)
6248                         back->node.found_ref = 0;
6249                 if (back->num_refs == 0)
6250                         back->node.found_extent_tree = 0;
6251
6252                 if (!back->node.found_extent_tree && back->node.found_ref) {
6253                         list_del(&back->node.list);
6254                         free(back);
6255                 }
6256         } else {
6257                 struct tree_backref *back;
6258                 back = find_tree_backref(rec, parent, root_objectid);
6259                 if (!back)
6260                         goto out;
6261                 if (back->node.found_ref) {
6262                         if (rec->refs)
6263                                 rec->refs--;
6264                         back->node.found_ref = 0;
6265                 }
6266                 if (back->node.found_extent_tree) {
6267                         if (rec->extent_item_refs)
6268                                 rec->extent_item_refs--;
6269                         back->node.found_extent_tree = 0;
6270                 }
6271                 if (!back->node.found_extent_tree && back->node.found_ref) {
6272                         list_del(&back->node.list);
6273                         free(back);
6274                 }
6275         }
6276         maybe_free_extent_rec(extent_cache, rec);
6277 out:
6278         return 0;
6279 }
6280
6281 static int delete_extent_records(struct btrfs_trans_handle *trans,
6282                                  struct btrfs_root *root,
6283                                  struct btrfs_path *path,
6284                                  u64 bytenr, u64 new_len)
6285 {
6286         struct btrfs_key key;
6287         struct btrfs_key found_key;
6288         struct extent_buffer *leaf;
6289         int ret;
6290         int slot;
6291
6292
6293         key.objectid = bytenr;
6294         key.type = (u8)-1;
6295         key.offset = (u64)-1;
6296
6297         while(1) {
6298                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6299                                         &key, path, 0, 1);
6300                 if (ret < 0)
6301                         break;
6302
6303                 if (ret > 0) {
6304                         ret = 0;
6305                         if (path->slots[0] == 0)
6306                                 break;
6307                         path->slots[0]--;
6308                 }
6309                 ret = 0;
6310
6311                 leaf = path->nodes[0];
6312                 slot = path->slots[0];
6313
6314                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6315                 if (found_key.objectid != bytenr)
6316                         break;
6317
6318                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6319                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6320                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6321                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6322                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6323                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6324                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6325                         btrfs_release_path(path);
6326                         if (found_key.type == 0) {
6327                                 if (found_key.offset == 0)
6328                                         break;
6329                                 key.offset = found_key.offset - 1;
6330                                 key.type = found_key.type;
6331                         }
6332                         key.type = found_key.type - 1;
6333                         key.offset = (u64)-1;
6334                         continue;
6335                 }
6336
6337                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6338                         found_key.objectid, found_key.type, found_key.offset);
6339
6340                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6341                 if (ret)
6342                         break;
6343                 btrfs_release_path(path);
6344
6345                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6346                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6347                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6348                                 found_key.offset : root->leafsize;
6349
6350                         ret = btrfs_update_block_group(trans, root, bytenr,
6351                                                        bytes, 0, 0);
6352                         if (ret)
6353                                 break;
6354                 }
6355         }
6356
6357         btrfs_release_path(path);
6358         return ret;
6359 }
6360
6361 /*
6362  * for a single backref, this will allocate a new extent
6363  * and add the backref to it.
6364  */
6365 static int record_extent(struct btrfs_trans_handle *trans,
6366                          struct btrfs_fs_info *info,
6367                          struct btrfs_path *path,
6368                          struct extent_record *rec,
6369                          struct extent_backref *back,
6370                          int allocated, u64 flags)
6371 {
6372         int ret;
6373         struct btrfs_root *extent_root = info->extent_root;
6374         struct extent_buffer *leaf;
6375         struct btrfs_key ins_key;
6376         struct btrfs_extent_item *ei;
6377         struct tree_backref *tback;
6378         struct data_backref *dback;
6379         struct btrfs_tree_block_info *bi;
6380
6381         if (!back->is_data)
6382                 rec->max_size = max_t(u64, rec->max_size,
6383                                     info->extent_root->leafsize);
6384
6385         if (!allocated) {
6386                 u32 item_size = sizeof(*ei);
6387
6388                 if (!back->is_data)
6389                         item_size += sizeof(*bi);
6390
6391                 ins_key.objectid = rec->start;
6392                 ins_key.offset = rec->max_size;
6393                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6394
6395                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6396                                         &ins_key, item_size);
6397                 if (ret)
6398                         goto fail;
6399
6400                 leaf = path->nodes[0];
6401                 ei = btrfs_item_ptr(leaf, path->slots[0],
6402                                     struct btrfs_extent_item);
6403
6404                 btrfs_set_extent_refs(leaf, ei, 0);
6405                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6406
6407                 if (back->is_data) {
6408                         btrfs_set_extent_flags(leaf, ei,
6409                                                BTRFS_EXTENT_FLAG_DATA);
6410                 } else {
6411                         struct btrfs_disk_key copy_key;;
6412
6413                         tback = (struct tree_backref *)back;
6414                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6415                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6416                                              sizeof(*bi));
6417
6418                         btrfs_set_disk_key_objectid(&copy_key,
6419                                                     rec->info_objectid);
6420                         btrfs_set_disk_key_type(&copy_key, 0);
6421                         btrfs_set_disk_key_offset(&copy_key, 0);
6422
6423                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6424                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6425
6426                         btrfs_set_extent_flags(leaf, ei,
6427                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6428                 }
6429
6430                 btrfs_mark_buffer_dirty(leaf);
6431                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6432                                                rec->max_size, 1, 0);
6433                 if (ret)
6434                         goto fail;
6435                 btrfs_release_path(path);
6436         }
6437
6438         if (back->is_data) {
6439                 u64 parent;
6440                 int i;
6441
6442                 dback = (struct data_backref *)back;
6443                 if (back->full_backref)
6444                         parent = dback->parent;
6445                 else
6446                         parent = 0;
6447
6448                 for (i = 0; i < dback->found_ref; i++) {
6449                         /* if parent != 0, we're doing a full backref
6450                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6451                          * just makes the backref allocator create a data
6452                          * backref
6453                          */
6454                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6455                                                    rec->start, rec->max_size,
6456                                                    parent,
6457                                                    dback->root,
6458                                                    parent ?
6459                                                    BTRFS_FIRST_FREE_OBJECTID :
6460                                                    dback->owner,
6461                                                    dback->offset);
6462                         if (ret)
6463                                 break;
6464                 }
6465                 fprintf(stderr, "adding new data backref"
6466                                 " on %llu %s %llu owner %llu"
6467                                 " offset %llu found %d\n",
6468                                 (unsigned long long)rec->start,
6469                                 back->full_backref ?
6470                                 "parent" : "root",
6471                                 back->full_backref ?
6472                                 (unsigned long long)parent :
6473                                 (unsigned long long)dback->root,
6474                                 (unsigned long long)dback->owner,
6475                                 (unsigned long long)dback->offset,
6476                                 dback->found_ref);
6477         } else {
6478                 u64 parent;
6479
6480                 tback = (struct tree_backref *)back;
6481                 if (back->full_backref)
6482                         parent = tback->parent;
6483                 else
6484                         parent = 0;
6485
6486                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6487                                            rec->start, rec->max_size,
6488                                            parent, tback->root, 0, 0);
6489                 fprintf(stderr, "adding new tree backref on "
6490                         "start %llu len %llu parent %llu root %llu\n",
6491                         rec->start, rec->max_size, parent, tback->root);
6492         }
6493 fail:
6494         btrfs_release_path(path);
6495         return ret;
6496 }
6497
6498 struct extent_entry {
6499         u64 bytenr;
6500         u64 bytes;
6501         int count;
6502         int broken;
6503         struct list_head list;
6504 };
6505
6506 static struct extent_entry *find_entry(struct list_head *entries,
6507                                        u64 bytenr, u64 bytes)
6508 {
6509         struct extent_entry *entry = NULL;
6510
6511         list_for_each_entry(entry, entries, list) {
6512                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6513                         return entry;
6514         }
6515
6516         return NULL;
6517 }
6518
6519 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6520 {
6521         struct extent_entry *entry, *best = NULL, *prev = NULL;
6522
6523         list_for_each_entry(entry, entries, list) {
6524                 if (!prev) {
6525                         prev = entry;
6526                         continue;
6527                 }
6528
6529                 /*
6530                  * If there are as many broken entries as entries then we know
6531                  * not to trust this particular entry.
6532                  */
6533                 if (entry->broken == entry->count)
6534                         continue;
6535
6536                 /*
6537                  * If our current entry == best then we can't be sure our best
6538                  * is really the best, so we need to keep searching.
6539                  */
6540                 if (best && best->count == entry->count) {
6541                         prev = entry;
6542                         best = NULL;
6543                         continue;
6544                 }
6545
6546                 /* Prev == entry, not good enough, have to keep searching */
6547                 if (!prev->broken && prev->count == entry->count)
6548                         continue;
6549
6550                 if (!best)
6551                         best = (prev->count > entry->count) ? prev : entry;
6552                 else if (best->count < entry->count)
6553                         best = entry;
6554                 prev = entry;
6555         }
6556
6557         return best;
6558 }
6559
6560 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6561                       struct data_backref *dback, struct extent_entry *entry)
6562 {
6563         struct btrfs_trans_handle *trans;
6564         struct btrfs_root *root;
6565         struct btrfs_file_extent_item *fi;
6566         struct extent_buffer *leaf;
6567         struct btrfs_key key;
6568         u64 bytenr, bytes;
6569         int ret, err;
6570
6571         key.objectid = dback->root;
6572         key.type = BTRFS_ROOT_ITEM_KEY;
6573         key.offset = (u64)-1;
6574         root = btrfs_read_fs_root(info, &key);
6575         if (IS_ERR(root)) {
6576                 fprintf(stderr, "Couldn't find root for our ref\n");
6577                 return -EINVAL;
6578         }
6579
6580         /*
6581          * The backref points to the original offset of the extent if it was
6582          * split, so we need to search down to the offset we have and then walk
6583          * forward until we find the backref we're looking for.
6584          */
6585         key.objectid = dback->owner;
6586         key.type = BTRFS_EXTENT_DATA_KEY;
6587         key.offset = dback->offset;
6588         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6589         if (ret < 0) {
6590                 fprintf(stderr, "Error looking up ref %d\n", ret);
6591                 return ret;
6592         }
6593
6594         while (1) {
6595                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6596                         ret = btrfs_next_leaf(root, path);
6597                         if (ret) {
6598                                 fprintf(stderr, "Couldn't find our ref, next\n");
6599                                 return -EINVAL;
6600                         }
6601                 }
6602                 leaf = path->nodes[0];
6603                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6604                 if (key.objectid != dback->owner ||
6605                     key.type != BTRFS_EXTENT_DATA_KEY) {
6606                         fprintf(stderr, "Couldn't find our ref, search\n");
6607                         return -EINVAL;
6608                 }
6609                 fi = btrfs_item_ptr(leaf, path->slots[0],
6610                                     struct btrfs_file_extent_item);
6611                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6612                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6613
6614                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6615                         break;
6616                 path->slots[0]++;
6617         }
6618
6619         btrfs_release_path(path);
6620
6621         trans = btrfs_start_transaction(root, 1);
6622         if (IS_ERR(trans))
6623                 return PTR_ERR(trans);
6624
6625         /*
6626          * Ok we have the key of the file extent we want to fix, now we can cow
6627          * down to the thing and fix it.
6628          */
6629         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6630         if (ret < 0) {
6631                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6632                         key.objectid, key.type, key.offset, ret);
6633                 goto out;
6634         }
6635         if (ret > 0) {
6636                 fprintf(stderr, "Well that's odd, we just found this key "
6637                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6638                         key.offset);
6639                 ret = -EINVAL;
6640                 goto out;
6641         }
6642         leaf = path->nodes[0];
6643         fi = btrfs_item_ptr(leaf, path->slots[0],
6644                             struct btrfs_file_extent_item);
6645
6646         if (btrfs_file_extent_compression(leaf, fi) &&
6647             dback->disk_bytenr != entry->bytenr) {
6648                 fprintf(stderr, "Ref doesn't match the record start and is "
6649                         "compressed, please take a btrfs-image of this file "
6650                         "system and send it to a btrfs developer so they can "
6651                         "complete this functionality for bytenr %Lu\n",
6652                         dback->disk_bytenr);
6653                 ret = -EINVAL;
6654                 goto out;
6655         }
6656
6657         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6658                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6659         } else if (dback->disk_bytenr > entry->bytenr) {
6660                 u64 off_diff, offset;
6661
6662                 off_diff = dback->disk_bytenr - entry->bytenr;
6663                 offset = btrfs_file_extent_offset(leaf, fi);
6664                 if (dback->disk_bytenr + offset +
6665                     btrfs_file_extent_num_bytes(leaf, fi) >
6666                     entry->bytenr + entry->bytes) {
6667                         fprintf(stderr, "Ref is past the entry end, please "
6668                                 "take a btrfs-image of this file system and "
6669                                 "send it to a btrfs developer, ref %Lu\n",
6670                                 dback->disk_bytenr);
6671                         ret = -EINVAL;
6672                         goto out;
6673                 }
6674                 offset += off_diff;
6675                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6676                 btrfs_set_file_extent_offset(leaf, fi, offset);
6677         } else if (dback->disk_bytenr < entry->bytenr) {
6678                 u64 offset;
6679
6680                 offset = btrfs_file_extent_offset(leaf, fi);
6681                 if (dback->disk_bytenr + offset < entry->bytenr) {
6682                         fprintf(stderr, "Ref is before the entry start, please"
6683                                 " take a btrfs-image of this file system and "
6684                                 "send it to a btrfs developer, ref %Lu\n",
6685                                 dback->disk_bytenr);
6686                         ret = -EINVAL;
6687                         goto out;
6688                 }
6689
6690                 offset += dback->disk_bytenr;
6691                 offset -= entry->bytenr;
6692                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6693                 btrfs_set_file_extent_offset(leaf, fi, offset);
6694         }
6695
6696         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6697
6698         /*
6699          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6700          * only do this if we aren't using compression, otherwise it's a
6701          * trickier case.
6702          */
6703         if (!btrfs_file_extent_compression(leaf, fi))
6704                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6705         else
6706                 printf("ram bytes may be wrong?\n");
6707         btrfs_mark_buffer_dirty(leaf);
6708 out:
6709         err = btrfs_commit_transaction(trans, root);
6710         btrfs_release_path(path);
6711         return ret ? ret : err;
6712 }
6713
6714 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6715                            struct extent_record *rec)
6716 {
6717         struct extent_backref *back;
6718         struct data_backref *dback;
6719         struct extent_entry *entry, *best = NULL;
6720         LIST_HEAD(entries);
6721         int nr_entries = 0;
6722         int broken_entries = 0;
6723         int ret = 0;
6724         short mismatch = 0;
6725
6726         /*
6727          * Metadata is easy and the backrefs should always agree on bytenr and
6728          * size, if not we've got bigger issues.
6729          */
6730         if (rec->metadata)
6731                 return 0;
6732
6733         list_for_each_entry(back, &rec->backrefs, list) {
6734                 if (back->full_backref || !back->is_data)
6735                         continue;
6736
6737                 dback = (struct data_backref *)back;
6738
6739                 /*
6740                  * We only pay attention to backrefs that we found a real
6741                  * backref for.
6742                  */
6743                 if (dback->found_ref == 0)
6744                         continue;
6745
6746                 /*
6747                  * For now we only catch when the bytes don't match, not the
6748                  * bytenr.  We can easily do this at the same time, but I want
6749                  * to have a fs image to test on before we just add repair
6750                  * functionality willy-nilly so we know we won't screw up the
6751                  * repair.
6752                  */
6753
6754                 entry = find_entry(&entries, dback->disk_bytenr,
6755                                    dback->bytes);
6756                 if (!entry) {
6757                         entry = malloc(sizeof(struct extent_entry));
6758                         if (!entry) {
6759                                 ret = -ENOMEM;
6760                                 goto out;
6761                         }
6762                         memset(entry, 0, sizeof(*entry));
6763                         entry->bytenr = dback->disk_bytenr;
6764                         entry->bytes = dback->bytes;
6765                         list_add_tail(&entry->list, &entries);
6766                         nr_entries++;
6767                 }
6768
6769                 /*
6770                  * If we only have on entry we may think the entries agree when
6771                  * in reality they don't so we have to do some extra checking.
6772                  */
6773                 if (dback->disk_bytenr != rec->start ||
6774                     dback->bytes != rec->nr || back->broken)
6775                         mismatch = 1;
6776
6777                 if (back->broken) {
6778                         entry->broken++;
6779                         broken_entries++;
6780                 }
6781
6782                 entry->count++;
6783         }
6784
6785         /* Yay all the backrefs agree, carry on good sir */
6786         if (nr_entries <= 1 && !mismatch)
6787                 goto out;
6788
6789         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6790                 "%Lu\n", rec->start);
6791
6792         /*
6793          * First we want to see if the backrefs can agree amongst themselves who
6794          * is right, so figure out which one of the entries has the highest
6795          * count.
6796          */
6797         best = find_most_right_entry(&entries);
6798
6799         /*
6800          * Ok so we may have an even split between what the backrefs think, so
6801          * this is where we use the extent ref to see what it thinks.
6802          */
6803         if (!best) {
6804                 entry = find_entry(&entries, rec->start, rec->nr);
6805                 if (!entry && (!broken_entries || !rec->found_rec)) {
6806                         fprintf(stderr, "Backrefs don't agree with each other "
6807                                 "and extent record doesn't agree with anybody,"
6808                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6809                                 rec->start, rec->nr);
6810                         ret = -EINVAL;
6811                         goto out;
6812                 } else if (!entry) {
6813                         /*
6814                          * Ok our backrefs were broken, we'll assume this is the
6815                          * correct value and add an entry for this range.
6816                          */
6817                         entry = malloc(sizeof(struct extent_entry));
6818                         if (!entry) {
6819                                 ret = -ENOMEM;
6820                                 goto out;
6821                         }
6822                         memset(entry, 0, sizeof(*entry));
6823                         entry->bytenr = rec->start;
6824                         entry->bytes = rec->nr;
6825                         list_add_tail(&entry->list, &entries);
6826                         nr_entries++;
6827                 }
6828                 entry->count++;
6829                 best = find_most_right_entry(&entries);
6830                 if (!best) {
6831                         fprintf(stderr, "Backrefs and extent record evenly "
6832                                 "split on who is right, this is going to "
6833                                 "require user input to fix bytenr %Lu bytes "
6834                                 "%Lu\n", rec->start, rec->nr);
6835                         ret = -EINVAL;
6836                         goto out;
6837                 }
6838         }
6839
6840         /*
6841          * I don't think this can happen currently as we'll abort() if we catch
6842          * this case higher up, but in case somebody removes that we still can't
6843          * deal with it properly here yet, so just bail out of that's the case.
6844          */
6845         if (best->bytenr != rec->start) {
6846                 fprintf(stderr, "Extent start and backref starts don't match, "
6847                         "please use btrfs-image on this file system and send "
6848                         "it to a btrfs developer so they can make fsck fix "
6849                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6850                         rec->start, rec->nr);
6851                 ret = -EINVAL;
6852                 goto out;
6853         }
6854
6855         /*
6856          * Ok great we all agreed on an extent record, let's go find the real
6857          * references and fix up the ones that don't match.
6858          */
6859         list_for_each_entry(back, &rec->backrefs, list) {
6860                 if (back->full_backref || !back->is_data)
6861                         continue;
6862
6863                 dback = (struct data_backref *)back;
6864
6865                 /*
6866                  * Still ignoring backrefs that don't have a real ref attached
6867                  * to them.
6868                  */
6869                 if (dback->found_ref == 0)
6870                         continue;
6871
6872                 if (dback->bytes == best->bytes &&
6873                     dback->disk_bytenr == best->bytenr)
6874                         continue;
6875
6876                 ret = repair_ref(info, path, dback, best);
6877                 if (ret)
6878                         goto out;
6879         }
6880
6881         /*
6882          * Ok we messed with the actual refs, which means we need to drop our
6883          * entire cache and go back and rescan.  I know this is a huge pain and
6884          * adds a lot of extra work, but it's the only way to be safe.  Once all
6885          * the backrefs agree we may not need to do anything to the extent
6886          * record itself.
6887          */
6888         ret = -EAGAIN;
6889 out:
6890         while (!list_empty(&entries)) {
6891                 entry = list_entry(entries.next, struct extent_entry, list);
6892                 list_del_init(&entry->list);
6893                 free(entry);
6894         }
6895         return ret;
6896 }
6897
6898 static int process_duplicates(struct btrfs_root *root,
6899                               struct cache_tree *extent_cache,
6900                               struct extent_record *rec)
6901 {
6902         struct extent_record *good, *tmp;
6903         struct cache_extent *cache;
6904         int ret;
6905
6906         /*
6907          * If we found a extent record for this extent then return, or if we
6908          * have more than one duplicate we are likely going to need to delete
6909          * something.
6910          */
6911         if (rec->found_rec || rec->num_duplicates > 1)
6912                 return 0;
6913
6914         /* Shouldn't happen but just in case */
6915         BUG_ON(!rec->num_duplicates);
6916
6917         /*
6918          * So this happens if we end up with a backref that doesn't match the
6919          * actual extent entry.  So either the backref is bad or the extent
6920          * entry is bad.  Either way we want to have the extent_record actually
6921          * reflect what we found in the extent_tree, so we need to take the
6922          * duplicate out and use that as the extent_record since the only way we
6923          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6924          */
6925         remove_cache_extent(extent_cache, &rec->cache);
6926
6927         good = list_entry(rec->dups.next, struct extent_record, list);
6928         list_del_init(&good->list);
6929         INIT_LIST_HEAD(&good->backrefs);
6930         INIT_LIST_HEAD(&good->dups);
6931         good->cache.start = good->start;
6932         good->cache.size = good->nr;
6933         good->content_checked = 0;
6934         good->owner_ref_checked = 0;
6935         good->num_duplicates = 0;
6936         good->refs = rec->refs;
6937         list_splice_init(&rec->backrefs, &good->backrefs);
6938         while (1) {
6939                 cache = lookup_cache_extent(extent_cache, good->start,
6940                                             good->nr);
6941                 if (!cache)
6942                         break;
6943                 tmp = container_of(cache, struct extent_record, cache);
6944
6945                 /*
6946                  * If we find another overlapping extent and it's found_rec is
6947                  * set then it's a duplicate and we need to try and delete
6948                  * something.
6949                  */
6950                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6951                         if (list_empty(&good->list))
6952                                 list_add_tail(&good->list,
6953                                               &duplicate_extents);
6954                         good->num_duplicates += tmp->num_duplicates + 1;
6955                         list_splice_init(&tmp->dups, &good->dups);
6956                         list_del_init(&tmp->list);
6957                         list_add_tail(&tmp->list, &good->dups);
6958                         remove_cache_extent(extent_cache, &tmp->cache);
6959                         continue;
6960                 }
6961
6962                 /*
6963                  * Ok we have another non extent item backed extent rec, so lets
6964                  * just add it to this extent and carry on like we did above.
6965                  */
6966                 good->refs += tmp->refs;
6967                 list_splice_init(&tmp->backrefs, &good->backrefs);
6968                 remove_cache_extent(extent_cache, &tmp->cache);
6969                 free(tmp);
6970         }
6971         ret = insert_cache_extent(extent_cache, &good->cache);
6972         BUG_ON(ret);
6973         free(rec);
6974         return good->num_duplicates ? 0 : 1;
6975 }
6976
6977 static int delete_duplicate_records(struct btrfs_root *root,
6978                                     struct extent_record *rec)
6979 {
6980         struct btrfs_trans_handle *trans;
6981         LIST_HEAD(delete_list);
6982         struct btrfs_path *path;
6983         struct extent_record *tmp, *good, *n;
6984         int nr_del = 0;
6985         int ret = 0, err;
6986         struct btrfs_key key;
6987
6988         path = btrfs_alloc_path();
6989         if (!path) {
6990                 ret = -ENOMEM;
6991                 goto out;
6992         }
6993
6994         good = rec;
6995         /* Find the record that covers all of the duplicates. */
6996         list_for_each_entry(tmp, &rec->dups, list) {
6997                 if (good->start < tmp->start)
6998                         continue;
6999                 if (good->nr > tmp->nr)
7000                         continue;
7001
7002                 if (tmp->start + tmp->nr < good->start + good->nr) {
7003                         fprintf(stderr, "Ok we have overlapping extents that "
7004                                 "aren't completely covered by eachother, this "
7005                                 "is going to require more careful thought.  "
7006                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7007                                 tmp->start, tmp->nr, good->start, good->nr);
7008                         abort();
7009                 }
7010                 good = tmp;
7011         }
7012
7013         if (good != rec)
7014                 list_add_tail(&rec->list, &delete_list);
7015
7016         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7017                 if (tmp == good)
7018                         continue;
7019                 list_move_tail(&tmp->list, &delete_list);
7020         }
7021
7022         root = root->fs_info->extent_root;
7023         trans = btrfs_start_transaction(root, 1);
7024         if (IS_ERR(trans)) {
7025                 ret = PTR_ERR(trans);
7026                 goto out;
7027         }
7028
7029         list_for_each_entry(tmp, &delete_list, list) {
7030                 if (tmp->found_rec == 0)
7031                         continue;
7032                 key.objectid = tmp->start;
7033                 key.type = BTRFS_EXTENT_ITEM_KEY;
7034                 key.offset = tmp->nr;
7035
7036                 /* Shouldn't happen but just in case */
7037                 if (tmp->metadata) {
7038                         fprintf(stderr, "Well this shouldn't happen, extent "
7039                                 "record overlaps but is metadata? "
7040                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7041                         abort();
7042                 }
7043
7044                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7045                 if (ret) {
7046                         if (ret > 0)
7047                                 ret = -EINVAL;
7048                         break;
7049                 }
7050                 ret = btrfs_del_item(trans, root, path);
7051                 if (ret)
7052                         break;
7053                 btrfs_release_path(path);
7054                 nr_del++;
7055         }
7056         err = btrfs_commit_transaction(trans, root);
7057         if (err && !ret)
7058                 ret = err;
7059 out:
7060         while (!list_empty(&delete_list)) {
7061                 tmp = list_entry(delete_list.next, struct extent_record, list);
7062                 list_del_init(&tmp->list);
7063                 if (tmp == rec)
7064                         continue;
7065                 free(tmp);
7066         }
7067
7068         while (!list_empty(&rec->dups)) {
7069                 tmp = list_entry(rec->dups.next, struct extent_record, list);
7070                 list_del_init(&tmp->list);
7071                 free(tmp);
7072         }
7073
7074         btrfs_free_path(path);
7075
7076         if (!ret && !nr_del)
7077                 rec->num_duplicates = 0;
7078
7079         return ret ? ret : nr_del;
7080 }
7081
7082 static int find_possible_backrefs(struct btrfs_fs_info *info,
7083                                   struct btrfs_path *path,
7084                                   struct cache_tree *extent_cache,
7085                                   struct extent_record *rec)
7086 {
7087         struct btrfs_root *root;
7088         struct extent_backref *back;
7089         struct data_backref *dback;
7090         struct cache_extent *cache;
7091         struct btrfs_file_extent_item *fi;
7092         struct btrfs_key key;
7093         u64 bytenr, bytes;
7094         int ret;
7095
7096         list_for_each_entry(back, &rec->backrefs, list) {
7097                 /* Don't care about full backrefs (poor unloved backrefs) */
7098                 if (back->full_backref || !back->is_data)
7099                         continue;
7100
7101                 dback = (struct data_backref *)back;
7102
7103                 /* We found this one, we don't need to do a lookup */
7104                 if (dback->found_ref)
7105                         continue;
7106
7107                 key.objectid = dback->root;
7108                 key.type = BTRFS_ROOT_ITEM_KEY;
7109                 key.offset = (u64)-1;
7110
7111                 root = btrfs_read_fs_root(info, &key);
7112
7113                 /* No root, definitely a bad ref, skip */
7114                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7115                         continue;
7116                 /* Other err, exit */
7117                 if (IS_ERR(root))
7118                         return PTR_ERR(root);
7119
7120                 key.objectid = dback->owner;
7121                 key.type = BTRFS_EXTENT_DATA_KEY;
7122                 key.offset = dback->offset;
7123                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7124                 if (ret) {
7125                         btrfs_release_path(path);
7126                         if (ret < 0)
7127                                 return ret;
7128                         /* Didn't find it, we can carry on */
7129                         ret = 0;
7130                         continue;
7131                 }
7132
7133                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7134                                     struct btrfs_file_extent_item);
7135                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7136                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7137                 btrfs_release_path(path);
7138                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7139                 if (cache) {
7140                         struct extent_record *tmp;
7141                         tmp = container_of(cache, struct extent_record, cache);
7142
7143                         /*
7144                          * If we found an extent record for the bytenr for this
7145                          * particular backref then we can't add it to our
7146                          * current extent record.  We only want to add backrefs
7147                          * that don't have a corresponding extent item in the
7148                          * extent tree since they likely belong to this record
7149                          * and we need to fix it if it doesn't match bytenrs.
7150                          */
7151                         if  (tmp->found_rec)
7152                                 continue;
7153                 }
7154
7155                 dback->found_ref += 1;
7156                 dback->disk_bytenr = bytenr;
7157                 dback->bytes = bytes;
7158
7159                 /*
7160                  * Set this so the verify backref code knows not to trust the
7161                  * values in this backref.
7162                  */
7163                 back->broken = 1;
7164         }
7165
7166         return 0;
7167 }
7168
7169 /*
7170  * Record orphan data ref into corresponding root.
7171  *
7172  * Return 0 if the extent item contains data ref and recorded.
7173  * Return 1 if the extent item contains no useful data ref
7174  *   On that case, it may contains only shared_dataref or metadata backref
7175  *   or the file extent exists(this should be handled by the extent bytenr
7176  *   recovery routine)
7177  * Return <0 if something goes wrong.
7178  */
7179 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7180                                       struct extent_record *rec)
7181 {
7182         struct btrfs_key key;
7183         struct btrfs_root *dest_root;
7184         struct extent_backref *back;
7185         struct data_backref *dback;
7186         struct orphan_data_extent *orphan;
7187         struct btrfs_path *path;
7188         int recorded_data_ref = 0;
7189         int ret = 0;
7190
7191         if (rec->metadata)
7192                 return 1;
7193         path = btrfs_alloc_path();
7194         if (!path)
7195                 return -ENOMEM;
7196         list_for_each_entry(back, &rec->backrefs, list) {
7197                 if (back->full_backref || !back->is_data ||
7198                     !back->found_extent_tree)
7199                         continue;
7200                 dback = (struct data_backref *)back;
7201                 if (dback->found_ref)
7202                         continue;
7203                 key.objectid = dback->root;
7204                 key.type = BTRFS_ROOT_ITEM_KEY;
7205                 key.offset = (u64)-1;
7206
7207                 dest_root = btrfs_read_fs_root(fs_info, &key);
7208
7209                 /* For non-exist root we just skip it */
7210                 if (IS_ERR(dest_root) || !dest_root)
7211                         continue;
7212
7213                 key.objectid = dback->owner;
7214                 key.type = BTRFS_EXTENT_DATA_KEY;
7215                 key.offset = dback->offset;
7216
7217                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7218                 /*
7219                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7220                  * we need to record it for inode/file extent rebuild.
7221                  * For ret > 0, we record it only for file extent rebuild.
7222                  * For ret == 0, the file extent exists but only bytenr
7223                  * mismatch, let the original bytenr fix routine to handle,
7224                  * don't record it.
7225                  */
7226                 if (ret == 0)
7227                         continue;
7228                 ret = 0;
7229                 orphan = malloc(sizeof(*orphan));
7230                 if (!orphan) {
7231                         ret = -ENOMEM;
7232                         goto out;
7233                 }
7234                 INIT_LIST_HEAD(&orphan->list);
7235                 orphan->root = dback->root;
7236                 orphan->objectid = dback->owner;
7237                 orphan->offset = dback->offset;
7238                 orphan->disk_bytenr = rec->cache.start;
7239                 orphan->disk_len = rec->cache.size;
7240                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7241                 recorded_data_ref = 1;
7242         }
7243 out:
7244         btrfs_free_path(path);
7245         if (!ret)
7246                 return !recorded_data_ref;
7247         else
7248                 return ret;
7249 }
7250
7251 /*
7252  * when an incorrect extent item is found, this will delete
7253  * all of the existing entries for it and recreate them
7254  * based on what the tree scan found.
7255  */
7256 static int fixup_extent_refs(struct btrfs_fs_info *info,
7257                              struct cache_tree *extent_cache,
7258                              struct extent_record *rec)
7259 {
7260         struct btrfs_trans_handle *trans = NULL;
7261         int ret;
7262         struct btrfs_path *path;
7263         struct list_head *cur = rec->backrefs.next;
7264         struct cache_extent *cache;
7265         struct extent_backref *back;
7266         int allocated = 0;
7267         u64 flags = 0;
7268
7269         if (rec->flag_block_full_backref)
7270                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7271
7272         path = btrfs_alloc_path();
7273         if (!path)
7274                 return -ENOMEM;
7275
7276         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7277                 /*
7278                  * Sometimes the backrefs themselves are so broken they don't
7279                  * get attached to any meaningful rec, so first go back and
7280                  * check any of our backrefs that we couldn't find and throw
7281                  * them into the list if we find the backref so that
7282                  * verify_backrefs can figure out what to do.
7283                  */
7284                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7285                 if (ret < 0)
7286                         goto out;
7287         }
7288
7289         /* step one, make sure all of the backrefs agree */
7290         ret = verify_backrefs(info, path, rec);
7291         if (ret < 0)
7292                 goto out;
7293
7294         trans = btrfs_start_transaction(info->extent_root, 1);
7295         if (IS_ERR(trans)) {
7296                 ret = PTR_ERR(trans);
7297                 goto out;
7298         }
7299
7300         /* step two, delete all the existing records */
7301         ret = delete_extent_records(trans, info->extent_root, path,
7302                                     rec->start, rec->max_size);
7303
7304         if (ret < 0)
7305                 goto out;
7306
7307         /* was this block corrupt?  If so, don't add references to it */
7308         cache = lookup_cache_extent(info->corrupt_blocks,
7309                                     rec->start, rec->max_size);
7310         if (cache) {
7311                 ret = 0;
7312                 goto out;
7313         }
7314
7315         /* step three, recreate all the refs we did find */
7316         while(cur != &rec->backrefs) {
7317                 back = list_entry(cur, struct extent_backref, list);
7318                 cur = cur->next;
7319
7320                 /*
7321                  * if we didn't find any references, don't create a
7322                  * new extent record
7323                  */
7324                 if (!back->found_ref)
7325                         continue;
7326
7327                 rec->bad_full_backref = 0;
7328                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7329                 allocated = 1;
7330
7331                 if (ret)
7332                         goto out;
7333         }
7334 out:
7335         if (trans) {
7336                 int err = btrfs_commit_transaction(trans, info->extent_root);
7337                 if (!ret)
7338                         ret = err;
7339         }
7340
7341         btrfs_free_path(path);
7342         return ret;
7343 }
7344
7345 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7346                               struct extent_record *rec)
7347 {
7348         struct btrfs_trans_handle *trans;
7349         struct btrfs_root *root = fs_info->extent_root;
7350         struct btrfs_path *path;
7351         struct btrfs_extent_item *ei;
7352         struct btrfs_key key;
7353         u64 flags;
7354         int ret = 0;
7355
7356         key.objectid = rec->start;
7357         if (rec->metadata) {
7358                 key.type = BTRFS_METADATA_ITEM_KEY;
7359                 key.offset = rec->info_level;
7360         } else {
7361                 key.type = BTRFS_EXTENT_ITEM_KEY;
7362                 key.offset = rec->max_size;
7363         }
7364
7365         path = btrfs_alloc_path();
7366         if (!path)
7367                 return -ENOMEM;
7368
7369         trans = btrfs_start_transaction(root, 0);
7370         if (IS_ERR(trans)) {
7371                 btrfs_free_path(path);
7372                 return PTR_ERR(trans);
7373         }
7374
7375         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7376         if (ret < 0) {
7377                 btrfs_free_path(path);
7378                 btrfs_commit_transaction(trans, root);
7379                 return ret;
7380         } else if (ret) {
7381                 fprintf(stderr, "Didn't find extent for %llu\n",
7382                         (unsigned long long)rec->start);
7383                 btrfs_free_path(path);
7384                 btrfs_commit_transaction(trans, root);
7385                 return -ENOENT;
7386         }
7387
7388         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7389                             struct btrfs_extent_item);
7390         flags = btrfs_extent_flags(path->nodes[0], ei);
7391         if (rec->flag_block_full_backref) {
7392                 fprintf(stderr, "setting full backref on %llu\n",
7393                         (unsigned long long)key.objectid);
7394                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7395         } else {
7396                 fprintf(stderr, "clearing full backref on %llu\n",
7397                         (unsigned long long)key.objectid);
7398                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7399         }
7400         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7401         btrfs_mark_buffer_dirty(path->nodes[0]);
7402         btrfs_free_path(path);
7403         return btrfs_commit_transaction(trans, root);
7404 }
7405
7406 /* right now we only prune from the extent allocation tree */
7407 static int prune_one_block(struct btrfs_trans_handle *trans,
7408                            struct btrfs_fs_info *info,
7409                            struct btrfs_corrupt_block *corrupt)
7410 {
7411         int ret;
7412         struct btrfs_path path;
7413         struct extent_buffer *eb;
7414         u64 found;
7415         int slot;
7416         int nritems;
7417         int level = corrupt->level + 1;
7418
7419         btrfs_init_path(&path);
7420 again:
7421         /* we want to stop at the parent to our busted block */
7422         path.lowest_level = level;
7423
7424         ret = btrfs_search_slot(trans, info->extent_root,
7425                                 &corrupt->key, &path, -1, 1);
7426
7427         if (ret < 0)
7428                 goto out;
7429
7430         eb = path.nodes[level];
7431         if (!eb) {
7432                 ret = -ENOENT;
7433                 goto out;
7434         }
7435
7436         /*
7437          * hopefully the search gave us the block we want to prune,
7438          * lets try that first
7439          */
7440         slot = path.slots[level];
7441         found =  btrfs_node_blockptr(eb, slot);
7442         if (found == corrupt->cache.start)
7443                 goto del_ptr;
7444
7445         nritems = btrfs_header_nritems(eb);
7446
7447         /* the search failed, lets scan this node and hope we find it */
7448         for (slot = 0; slot < nritems; slot++) {
7449                 found =  btrfs_node_blockptr(eb, slot);
7450                 if (found == corrupt->cache.start)
7451                         goto del_ptr;
7452         }
7453         /*
7454          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7455          * to this block
7456          */
7457         if (eb == info->extent_root->node) {
7458                 ret = -ENOENT;
7459                 goto out;
7460         } else {
7461                 level++;
7462                 btrfs_release_path(&path);
7463                 goto again;
7464         }
7465
7466 del_ptr:
7467         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7468         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7469
7470 out:
7471         btrfs_release_path(&path);
7472         return ret;
7473 }
7474
7475 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7476 {
7477         struct btrfs_trans_handle *trans = NULL;
7478         struct cache_extent *cache;
7479         struct btrfs_corrupt_block *corrupt;
7480
7481         while (1) {
7482                 cache = search_cache_extent(info->corrupt_blocks, 0);
7483                 if (!cache)
7484                         break;
7485                 if (!trans) {
7486                         trans = btrfs_start_transaction(info->extent_root, 1);
7487                         if (IS_ERR(trans))
7488                                 return PTR_ERR(trans);
7489                 }
7490                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7491                 prune_one_block(trans, info, corrupt);
7492                 remove_cache_extent(info->corrupt_blocks, cache);
7493         }
7494         if (trans)
7495                 return btrfs_commit_transaction(trans, info->extent_root);
7496         return 0;
7497 }
7498
7499 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7500 {
7501         struct btrfs_block_group_cache *cache;
7502         u64 start, end;
7503         int ret;
7504
7505         while (1) {
7506                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7507                                             &start, &end, EXTENT_DIRTY);
7508                 if (ret)
7509                         break;
7510                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7511                                    GFP_NOFS);
7512         }
7513
7514         start = 0;
7515         while (1) {
7516                 cache = btrfs_lookup_first_block_group(fs_info, start);
7517                 if (!cache)
7518                         break;
7519                 if (cache->cached)
7520                         cache->cached = 0;
7521                 start = cache->key.objectid + cache->key.offset;
7522         }
7523 }
7524
7525 static int check_extent_refs(struct btrfs_root *root,
7526                              struct cache_tree *extent_cache)
7527 {
7528         struct extent_record *rec;
7529         struct cache_extent *cache;
7530         int err = 0;
7531         int ret = 0;
7532         int fixed = 0;
7533         int had_dups = 0;
7534         int recorded = 0;
7535
7536         if (repair) {
7537                 /*
7538                  * if we're doing a repair, we have to make sure
7539                  * we don't allocate from the problem extents.
7540                  * In the worst case, this will be all the
7541                  * extents in the FS
7542                  */
7543                 cache = search_cache_extent(extent_cache, 0);
7544                 while(cache) {
7545                         rec = container_of(cache, struct extent_record, cache);
7546                         set_extent_dirty(root->fs_info->excluded_extents,
7547                                          rec->start,
7548                                          rec->start + rec->max_size - 1,
7549                                          GFP_NOFS);
7550                         cache = next_cache_extent(cache);
7551                 }
7552
7553                 /* pin down all the corrupted blocks too */
7554                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7555                 while(cache) {
7556                         set_extent_dirty(root->fs_info->excluded_extents,
7557                                          cache->start,
7558                                          cache->start + cache->size - 1,
7559                                          GFP_NOFS);
7560                         cache = next_cache_extent(cache);
7561                 }
7562                 prune_corrupt_blocks(root->fs_info);
7563                 reset_cached_block_groups(root->fs_info);
7564         }
7565
7566         reset_cached_block_groups(root->fs_info);
7567
7568         /*
7569          * We need to delete any duplicate entries we find first otherwise we
7570          * could mess up the extent tree when we have backrefs that actually
7571          * belong to a different extent item and not the weird duplicate one.
7572          */
7573         while (repair && !list_empty(&duplicate_extents)) {
7574                 rec = list_entry(duplicate_extents.next, struct extent_record,
7575                                  list);
7576                 list_del_init(&rec->list);
7577
7578                 /* Sometimes we can find a backref before we find an actual
7579                  * extent, so we need to process it a little bit to see if there
7580                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7581                  * if this is a backref screwup.  If we need to delete stuff
7582                  * process_duplicates() will return 0, otherwise it will return
7583                  * 1 and we
7584                  */
7585                 if (process_duplicates(root, extent_cache, rec))
7586                         continue;
7587                 ret = delete_duplicate_records(root, rec);
7588                 if (ret < 0)
7589                         return ret;
7590                 /*
7591                  * delete_duplicate_records will return the number of entries
7592                  * deleted, so if it's greater than 0 then we know we actually
7593                  * did something and we need to remove.
7594                  */
7595                 if (ret)
7596                         had_dups = 1;
7597         }
7598
7599         if (had_dups)
7600                 return -EAGAIN;
7601
7602         while(1) {
7603                 int cur_err = 0;
7604
7605                 fixed = 0;
7606                 recorded = 0;
7607                 cache = search_cache_extent(extent_cache, 0);
7608                 if (!cache)
7609                         break;
7610                 rec = container_of(cache, struct extent_record, cache);
7611                 if (rec->num_duplicates) {
7612                         fprintf(stderr, "extent item %llu has multiple extent "
7613                                 "items\n", (unsigned long long)rec->start);
7614                         err = 1;
7615                         cur_err = 1;
7616                 }
7617
7618                 if (rec->refs != rec->extent_item_refs) {
7619                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7620                                 (unsigned long long)rec->start,
7621                                 (unsigned long long)rec->nr);
7622                         fprintf(stderr, "extent item %llu, found %llu\n",
7623                                 (unsigned long long)rec->extent_item_refs,
7624                                 (unsigned long long)rec->refs);
7625                         ret = record_orphan_data_extents(root->fs_info, rec);
7626                         if (ret < 0)
7627                                 goto repair_abort;
7628                         if (ret == 0) {
7629                                 recorded = 1;
7630                         } else {
7631                                 /*
7632                                  * we can't use the extent to repair file
7633                                  * extent, let the fallback method handle it.
7634                                  */
7635                                 if (!fixed && repair) {
7636                                         ret = fixup_extent_refs(
7637                                                         root->fs_info,
7638                                                         extent_cache, rec);
7639                                         if (ret)
7640                                                 goto repair_abort;
7641                                         fixed = 1;
7642                                 }
7643                         }
7644                         err = 1;
7645                         cur_err = 1;
7646                 }
7647                 if (all_backpointers_checked(rec, 1)) {
7648                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7649                                 (unsigned long long)rec->start,
7650                                 (unsigned long long)rec->nr);
7651
7652                         if (!fixed && !recorded && repair) {
7653                                 ret = fixup_extent_refs(root->fs_info,
7654                                                         extent_cache, rec);
7655                                 if (ret)
7656                                         goto repair_abort;
7657                                 fixed = 1;
7658                         }
7659                         cur_err = 1;
7660                         err = 1;
7661                 }
7662                 if (!rec->owner_ref_checked) {
7663                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7664                                 (unsigned long long)rec->start,
7665                                 (unsigned long long)rec->nr);
7666                         if (!fixed && !recorded && repair) {
7667                                 ret = fixup_extent_refs(root->fs_info,
7668                                                         extent_cache, rec);
7669                                 if (ret)
7670                                         goto repair_abort;
7671                                 fixed = 1;
7672                         }
7673                         err = 1;
7674                         cur_err = 1;
7675                 }
7676                 if (rec->bad_full_backref) {
7677                         fprintf(stderr, "bad full backref, on [%llu]\n",
7678                                 (unsigned long long)rec->start);
7679                         if (repair) {
7680                                 ret = fixup_extent_flags(root->fs_info, rec);
7681                                 if (ret)
7682                                         goto repair_abort;
7683                                 fixed = 1;
7684                         }
7685                         err = 1;
7686                         cur_err = 1;
7687                 }
7688                 /*
7689                  * Although it's not a extent ref's problem, we reuse this
7690                  * routine for error reporting.
7691                  * No repair function yet.
7692                  */
7693                 if (rec->crossing_stripes) {
7694                         fprintf(stderr,
7695                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7696                                 rec->start, rec->start + rec->max_size);
7697                         err = 1;
7698                         cur_err = 1;
7699                 }
7700
7701                 if (rec->wrong_chunk_type) {
7702                         fprintf(stderr,
7703                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7704                                 rec->start, rec->start + rec->max_size);
7705                         err = 1;
7706                         cur_err = 1;
7707                 }
7708
7709                 remove_cache_extent(extent_cache, cache);
7710                 free_all_extent_backrefs(rec);
7711                 if (!init_extent_tree && repair && (!cur_err || fixed))
7712                         clear_extent_dirty(root->fs_info->excluded_extents,
7713                                            rec->start,
7714                                            rec->start + rec->max_size - 1,
7715                                            GFP_NOFS);
7716                 free(rec);
7717         }
7718 repair_abort:
7719         if (repair) {
7720                 if (ret && ret != -EAGAIN) {
7721                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7722                         exit(1);
7723                 } else if (!ret) {
7724                         struct btrfs_trans_handle *trans;
7725
7726                         root = root->fs_info->extent_root;
7727                         trans = btrfs_start_transaction(root, 1);
7728                         if (IS_ERR(trans)) {
7729                                 ret = PTR_ERR(trans);
7730                                 goto repair_abort;
7731                         }
7732
7733                         btrfs_fix_block_accounting(trans, root);
7734                         ret = btrfs_commit_transaction(trans, root);
7735                         if (ret)
7736                                 goto repair_abort;
7737                 }
7738                 if (err)
7739                         fprintf(stderr, "repaired damaged extent references\n");
7740                 return ret;
7741         }
7742         return err;
7743 }
7744
7745 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7746 {
7747         u64 stripe_size;
7748
7749         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7750                 stripe_size = length;
7751                 stripe_size /= num_stripes;
7752         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7753                 stripe_size = length * 2;
7754                 stripe_size /= num_stripes;
7755         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7756                 stripe_size = length;
7757                 stripe_size /= (num_stripes - 1);
7758         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7759                 stripe_size = length;
7760                 stripe_size /= (num_stripes - 2);
7761         } else {
7762                 stripe_size = length;
7763         }
7764         return stripe_size;
7765 }
7766
7767 /*
7768  * Check the chunk with its block group/dev list ref:
7769  * Return 0 if all refs seems valid.
7770  * Return 1 if part of refs seems valid, need later check for rebuild ref
7771  * like missing block group and needs to search extent tree to rebuild them.
7772  * Return -1 if essential refs are missing and unable to rebuild.
7773  */
7774 static int check_chunk_refs(struct chunk_record *chunk_rec,
7775                             struct block_group_tree *block_group_cache,
7776                             struct device_extent_tree *dev_extent_cache,
7777                             int silent)
7778 {
7779         struct cache_extent *block_group_item;
7780         struct block_group_record *block_group_rec;
7781         struct cache_extent *dev_extent_item;
7782         struct device_extent_record *dev_extent_rec;
7783         u64 devid;
7784         u64 offset;
7785         u64 length;
7786         int metadump_v2 = 0;
7787         int i;
7788         int ret = 0;
7789
7790         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7791                                                chunk_rec->offset,
7792                                                chunk_rec->length);
7793         if (block_group_item) {
7794                 block_group_rec = container_of(block_group_item,
7795                                                struct block_group_record,
7796                                                cache);
7797                 if (chunk_rec->length != block_group_rec->offset ||
7798                     chunk_rec->offset != block_group_rec->objectid ||
7799                     (!metadump_v2 &&
7800                      chunk_rec->type_flags != block_group_rec->flags)) {
7801                         if (!silent)
7802                                 fprintf(stderr,
7803                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7804                                         chunk_rec->objectid,
7805                                         chunk_rec->type,
7806                                         chunk_rec->offset,
7807                                         chunk_rec->length,
7808                                         chunk_rec->offset,
7809                                         chunk_rec->type_flags,
7810                                         block_group_rec->objectid,
7811                                         block_group_rec->type,
7812                                         block_group_rec->offset,
7813                                         block_group_rec->offset,
7814                                         block_group_rec->objectid,
7815                                         block_group_rec->flags);
7816                         ret = -1;
7817                 } else {
7818                         list_del_init(&block_group_rec->list);
7819                         chunk_rec->bg_rec = block_group_rec;
7820                 }
7821         } else {
7822                 if (!silent)
7823                         fprintf(stderr,
7824                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7825                                 chunk_rec->objectid,
7826                                 chunk_rec->type,
7827                                 chunk_rec->offset,
7828                                 chunk_rec->length,
7829                                 chunk_rec->offset,
7830                                 chunk_rec->type_flags);
7831                 ret = 1;
7832         }
7833
7834         if (metadump_v2)
7835                 return ret;
7836
7837         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7838                                     chunk_rec->num_stripes);
7839         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7840                 devid = chunk_rec->stripes[i].devid;
7841                 offset = chunk_rec->stripes[i].offset;
7842                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7843                                                        devid, offset, length);
7844                 if (dev_extent_item) {
7845                         dev_extent_rec = container_of(dev_extent_item,
7846                                                 struct device_extent_record,
7847                                                 cache);
7848                         if (dev_extent_rec->objectid != devid ||
7849                             dev_extent_rec->offset != offset ||
7850                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7851                             dev_extent_rec->length != length) {
7852                                 if (!silent)
7853                                         fprintf(stderr,
7854                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7855                                                 chunk_rec->objectid,
7856                                                 chunk_rec->type,
7857                                                 chunk_rec->offset,
7858                                                 chunk_rec->stripes[i].devid,
7859                                                 chunk_rec->stripes[i].offset,
7860                                                 dev_extent_rec->objectid,
7861                                                 dev_extent_rec->offset,
7862                                                 dev_extent_rec->length);
7863                                 ret = -1;
7864                         } else {
7865                                 list_move(&dev_extent_rec->chunk_list,
7866                                           &chunk_rec->dextents);
7867                         }
7868                 } else {
7869                         if (!silent)
7870                                 fprintf(stderr,
7871                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7872                                         chunk_rec->objectid,
7873                                         chunk_rec->type,
7874                                         chunk_rec->offset,
7875                                         chunk_rec->stripes[i].devid,
7876                                         chunk_rec->stripes[i].offset);
7877                         ret = -1;
7878                 }
7879         }
7880         return ret;
7881 }
7882
7883 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7884 int check_chunks(struct cache_tree *chunk_cache,
7885                  struct block_group_tree *block_group_cache,
7886                  struct device_extent_tree *dev_extent_cache,
7887                  struct list_head *good, struct list_head *bad,
7888                  struct list_head *rebuild, int silent)
7889 {
7890         struct cache_extent *chunk_item;
7891         struct chunk_record *chunk_rec;
7892         struct block_group_record *bg_rec;
7893         struct device_extent_record *dext_rec;
7894         int err;
7895         int ret = 0;
7896
7897         chunk_item = first_cache_extent(chunk_cache);
7898         while (chunk_item) {
7899                 chunk_rec = container_of(chunk_item, struct chunk_record,
7900                                          cache);
7901                 err = check_chunk_refs(chunk_rec, block_group_cache,
7902                                        dev_extent_cache, silent);
7903                 if (err < 0)
7904                         ret = err;
7905                 if (err == 0 && good)
7906                         list_add_tail(&chunk_rec->list, good);
7907                 if (err > 0 && rebuild)
7908                         list_add_tail(&chunk_rec->list, rebuild);
7909                 if (err < 0 && bad)
7910                         list_add_tail(&chunk_rec->list, bad);
7911                 chunk_item = next_cache_extent(chunk_item);
7912         }
7913
7914         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7915                 if (!silent)
7916                         fprintf(stderr,
7917                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7918                                 bg_rec->objectid,
7919                                 bg_rec->offset,
7920                                 bg_rec->flags);
7921                 if (!ret)
7922                         ret = 1;
7923         }
7924
7925         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7926                             chunk_list) {
7927                 if (!silent)
7928                         fprintf(stderr,
7929                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7930                                 dext_rec->objectid,
7931                                 dext_rec->offset,
7932                                 dext_rec->length);
7933                 if (!ret)
7934                         ret = 1;
7935         }
7936         return ret;
7937 }
7938
7939
7940 static int check_device_used(struct device_record *dev_rec,
7941                              struct device_extent_tree *dext_cache)
7942 {
7943         struct cache_extent *cache;
7944         struct device_extent_record *dev_extent_rec;
7945         u64 total_byte = 0;
7946
7947         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7948         while (cache) {
7949                 dev_extent_rec = container_of(cache,
7950                                               struct device_extent_record,
7951                                               cache);
7952                 if (dev_extent_rec->objectid != dev_rec->devid)
7953                         break;
7954
7955                 list_del_init(&dev_extent_rec->device_list);
7956                 total_byte += dev_extent_rec->length;
7957                 cache = next_cache_extent(cache);
7958         }
7959
7960         if (total_byte != dev_rec->byte_used) {
7961                 fprintf(stderr,
7962                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7963                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7964                         dev_rec->type, dev_rec->offset);
7965                 return -1;
7966         } else {
7967                 return 0;
7968         }
7969 }
7970
7971 /* check btrfs_dev_item -> btrfs_dev_extent */
7972 static int check_devices(struct rb_root *dev_cache,
7973                          struct device_extent_tree *dev_extent_cache)
7974 {
7975         struct rb_node *dev_node;
7976         struct device_record *dev_rec;
7977         struct device_extent_record *dext_rec;
7978         int err;
7979         int ret = 0;
7980
7981         dev_node = rb_first(dev_cache);
7982         while (dev_node) {
7983                 dev_rec = container_of(dev_node, struct device_record, node);
7984                 err = check_device_used(dev_rec, dev_extent_cache);
7985                 if (err)
7986                         ret = err;
7987
7988                 dev_node = rb_next(dev_node);
7989         }
7990         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7991                             device_list) {
7992                 fprintf(stderr,
7993                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7994                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
7995                 if (!ret)
7996                         ret = 1;
7997         }
7998         return ret;
7999 }
8000
8001 static int add_root_item_to_list(struct list_head *head,
8002                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8003                                   u8 level, u8 drop_level,
8004                                   int level_size, struct btrfs_key *drop_key)
8005 {
8006
8007         struct root_item_record *ri_rec;
8008         ri_rec = malloc(sizeof(*ri_rec));
8009         if (!ri_rec)
8010                 return -ENOMEM;
8011         ri_rec->bytenr = bytenr;
8012         ri_rec->objectid = objectid;
8013         ri_rec->level = level;
8014         ri_rec->level_size = level_size;
8015         ri_rec->drop_level = drop_level;
8016         ri_rec->last_snapshot = last_snapshot;
8017         if (drop_key)
8018                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8019         list_add_tail(&ri_rec->list, head);
8020
8021         return 0;
8022 }
8023
8024 static void free_root_item_list(struct list_head *list)
8025 {
8026         struct root_item_record *ri_rec;
8027
8028         while (!list_empty(list)) {
8029                 ri_rec = list_first_entry(list, struct root_item_record,
8030                                           list);
8031                 list_del_init(&ri_rec->list);
8032                 free(ri_rec);
8033         }
8034 }
8035
8036 static int deal_root_from_list(struct list_head *list,
8037                                struct btrfs_root *root,
8038                                struct block_info *bits,
8039                                int bits_nr,
8040                                struct cache_tree *pending,
8041                                struct cache_tree *seen,
8042                                struct cache_tree *reada,
8043                                struct cache_tree *nodes,
8044                                struct cache_tree *extent_cache,
8045                                struct cache_tree *chunk_cache,
8046                                struct rb_root *dev_cache,
8047                                struct block_group_tree *block_group_cache,
8048                                struct device_extent_tree *dev_extent_cache)
8049 {
8050         int ret = 0;
8051         u64 last;
8052
8053         while (!list_empty(list)) {
8054                 struct root_item_record *rec;
8055                 struct extent_buffer *buf;
8056                 rec = list_entry(list->next,
8057                                  struct root_item_record, list);
8058                 last = 0;
8059                 buf = read_tree_block(root->fs_info->tree_root,
8060                                       rec->bytenr, rec->level_size, 0);
8061                 if (!extent_buffer_uptodate(buf)) {
8062                         free_extent_buffer(buf);
8063                         ret = -EIO;
8064                         break;
8065                 }
8066                 add_root_to_pending(buf, extent_cache, pending,
8067                                     seen, nodes, rec->objectid);
8068                 /*
8069                  * To rebuild extent tree, we need deal with snapshot
8070                  * one by one, otherwise we deal with node firstly which
8071                  * can maximize readahead.
8072                  */
8073                 while (1) {
8074                         ret = run_next_block(root, bits, bits_nr, &last,
8075                                              pending, seen, reada, nodes,
8076                                              extent_cache, chunk_cache,
8077                                              dev_cache, block_group_cache,
8078                                              dev_extent_cache, rec);
8079                         if (ret != 0)
8080                                 break;
8081                 }
8082                 free_extent_buffer(buf);
8083                 list_del(&rec->list);
8084                 free(rec);
8085                 if (ret < 0)
8086                         break;
8087         }
8088         while (ret >= 0) {
8089                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8090                                      reada, nodes, extent_cache, chunk_cache,
8091                                      dev_cache, block_group_cache,
8092                                      dev_extent_cache, NULL);
8093                 if (ret != 0) {
8094                         if (ret > 0)
8095                                 ret = 0;
8096                         break;
8097                 }
8098         }
8099         return ret;
8100 }
8101
8102 static int check_chunks_and_extents(struct btrfs_root *root)
8103 {
8104         struct rb_root dev_cache;
8105         struct cache_tree chunk_cache;
8106         struct block_group_tree block_group_cache;
8107         struct device_extent_tree dev_extent_cache;
8108         struct cache_tree extent_cache;
8109         struct cache_tree seen;
8110         struct cache_tree pending;
8111         struct cache_tree reada;
8112         struct cache_tree nodes;
8113         struct extent_io_tree excluded_extents;
8114         struct cache_tree corrupt_blocks;
8115         struct btrfs_path path;
8116         struct btrfs_key key;
8117         struct btrfs_key found_key;
8118         int ret, err = 0;
8119         struct block_info *bits;
8120         int bits_nr;
8121         struct extent_buffer *leaf;
8122         int slot;
8123         struct btrfs_root_item ri;
8124         struct list_head dropping_trees;
8125         struct list_head normal_trees;
8126         struct btrfs_root *root1;
8127         u64 objectid;
8128         u32 level_size;
8129         u8 level;
8130
8131         dev_cache = RB_ROOT;
8132         cache_tree_init(&chunk_cache);
8133         block_group_tree_init(&block_group_cache);
8134         device_extent_tree_init(&dev_extent_cache);
8135
8136         cache_tree_init(&extent_cache);
8137         cache_tree_init(&seen);
8138         cache_tree_init(&pending);
8139         cache_tree_init(&nodes);
8140         cache_tree_init(&reada);
8141         cache_tree_init(&corrupt_blocks);
8142         extent_io_tree_init(&excluded_extents);
8143         INIT_LIST_HEAD(&dropping_trees);
8144         INIT_LIST_HEAD(&normal_trees);
8145
8146         if (repair) {
8147                 root->fs_info->excluded_extents = &excluded_extents;
8148                 root->fs_info->fsck_extent_cache = &extent_cache;
8149                 root->fs_info->free_extent_hook = free_extent_hook;
8150                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8151         }
8152
8153         bits_nr = 1024;
8154         bits = malloc(bits_nr * sizeof(struct block_info));
8155         if (!bits) {
8156                 perror("malloc");
8157                 exit(1);
8158         }
8159
8160         if (ctx.progress_enabled) {
8161                 ctx.tp = TASK_EXTENTS;
8162                 task_start(ctx.info);
8163         }
8164
8165 again:
8166         root1 = root->fs_info->tree_root;
8167         level = btrfs_header_level(root1->node);
8168         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8169                                     root1->node->start, 0, level, 0,
8170                                     btrfs_level_size(root1, level), NULL);
8171         if (ret < 0)
8172                 goto out;
8173         root1 = root->fs_info->chunk_root;
8174         level = btrfs_header_level(root1->node);
8175         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8176                                     root1->node->start, 0, level, 0,
8177                                     btrfs_level_size(root1, level), NULL);
8178         if (ret < 0)
8179                 goto out;
8180         btrfs_init_path(&path);
8181         key.offset = 0;
8182         key.objectid = 0;
8183         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8184         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8185                                         &key, &path, 0, 0);
8186         if (ret < 0)
8187                 goto out;
8188         while(1) {
8189                 leaf = path.nodes[0];
8190                 slot = path.slots[0];
8191                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8192                         ret = btrfs_next_leaf(root, &path);
8193                         if (ret != 0)
8194                                 break;
8195                         leaf = path.nodes[0];
8196                         slot = path.slots[0];
8197                 }
8198                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8199                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8200                         unsigned long offset;
8201                         u64 last_snapshot;
8202
8203                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8204                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8205                         last_snapshot = btrfs_root_last_snapshot(&ri);
8206                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8207                                 level = btrfs_root_level(&ri);
8208                                 level_size = btrfs_level_size(root, level);
8209                                 ret = add_root_item_to_list(&normal_trees,
8210                                                 found_key.objectid,
8211                                                 btrfs_root_bytenr(&ri),
8212                                                 last_snapshot, level,
8213                                                 0, level_size, NULL);
8214                                 if (ret < 0)
8215                                         goto out;
8216                         } else {
8217                                 level = btrfs_root_level(&ri);
8218                                 level_size = btrfs_level_size(root, level);
8219                                 objectid = found_key.objectid;
8220                                 btrfs_disk_key_to_cpu(&found_key,
8221                                                       &ri.drop_progress);
8222                                 ret = add_root_item_to_list(&dropping_trees,
8223                                                 objectid,
8224                                                 btrfs_root_bytenr(&ri),
8225                                                 last_snapshot, level,
8226                                                 ri.drop_level,
8227                                                 level_size, &found_key);
8228                                 if (ret < 0)
8229                                         goto out;
8230                         }
8231                 }
8232                 path.slots[0]++;
8233         }
8234         btrfs_release_path(&path);
8235
8236         /*
8237          * check_block can return -EAGAIN if it fixes something, please keep
8238          * this in mind when dealing with return values from these functions, if
8239          * we get -EAGAIN we want to fall through and restart the loop.
8240          */
8241         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8242                                   &seen, &reada, &nodes, &extent_cache,
8243                                   &chunk_cache, &dev_cache, &block_group_cache,
8244                                   &dev_extent_cache);
8245         if (ret < 0) {
8246                 if (ret == -EAGAIN)
8247                         goto loop;
8248                 goto out;
8249         }
8250         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8251                                   &pending, &seen, &reada, &nodes,
8252                                   &extent_cache, &chunk_cache, &dev_cache,
8253                                   &block_group_cache, &dev_extent_cache);
8254         if (ret < 0) {
8255                 if (ret == -EAGAIN)
8256                         goto loop;
8257                 goto out;
8258         }
8259
8260         ret = check_chunks(&chunk_cache, &block_group_cache,
8261                            &dev_extent_cache, NULL, NULL, NULL, 0);
8262         if (ret) {
8263                 if (ret == -EAGAIN)
8264                         goto loop;
8265                 err = ret;
8266         }
8267
8268         ret = check_extent_refs(root, &extent_cache);
8269         if (ret < 0) {
8270                 if (ret == -EAGAIN)
8271                         goto loop;
8272                 goto out;
8273         }
8274
8275         ret = check_devices(&dev_cache, &dev_extent_cache);
8276         if (ret && err)
8277                 ret = err;
8278
8279 out:
8280         task_stop(ctx.info);
8281         if (repair) {
8282                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8283                 extent_io_tree_cleanup(&excluded_extents);
8284                 root->fs_info->fsck_extent_cache = NULL;
8285                 root->fs_info->free_extent_hook = NULL;
8286                 root->fs_info->corrupt_blocks = NULL;
8287                 root->fs_info->excluded_extents = NULL;
8288         }
8289         free(bits);
8290         free_chunk_cache_tree(&chunk_cache);
8291         free_device_cache_tree(&dev_cache);
8292         free_block_group_tree(&block_group_cache);
8293         free_device_extent_tree(&dev_extent_cache);
8294         free_extent_cache_tree(&seen);
8295         free_extent_cache_tree(&pending);
8296         free_extent_cache_tree(&reada);
8297         free_extent_cache_tree(&nodes);
8298         return ret;
8299 loop:
8300         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8301         free_extent_cache_tree(&seen);
8302         free_extent_cache_tree(&pending);
8303         free_extent_cache_tree(&reada);
8304         free_extent_cache_tree(&nodes);
8305         free_chunk_cache_tree(&chunk_cache);
8306         free_block_group_tree(&block_group_cache);
8307         free_device_cache_tree(&dev_cache);
8308         free_device_extent_tree(&dev_extent_cache);
8309         free_extent_record_cache(root->fs_info, &extent_cache);
8310         free_root_item_list(&normal_trees);
8311         free_root_item_list(&dropping_trees);
8312         extent_io_tree_cleanup(&excluded_extents);
8313         goto again;
8314 }
8315
8316 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8317                            struct btrfs_root *root, int overwrite)
8318 {
8319         struct extent_buffer *c;
8320         struct extent_buffer *old = root->node;
8321         int level;
8322         int ret;
8323         struct btrfs_disk_key disk_key = {0,0,0};
8324
8325         level = 0;
8326
8327         if (overwrite) {
8328                 c = old;
8329                 extent_buffer_get(c);
8330                 goto init;
8331         }
8332         c = btrfs_alloc_free_block(trans, root,
8333                                    btrfs_level_size(root, 0),
8334                                    root->root_key.objectid,
8335                                    &disk_key, level, 0, 0);
8336         if (IS_ERR(c)) {
8337                 c = old;
8338                 extent_buffer_get(c);
8339                 overwrite = 1;
8340         }
8341 init:
8342         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8343         btrfs_set_header_level(c, level);
8344         btrfs_set_header_bytenr(c, c->start);
8345         btrfs_set_header_generation(c, trans->transid);
8346         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8347         btrfs_set_header_owner(c, root->root_key.objectid);
8348
8349         write_extent_buffer(c, root->fs_info->fsid,
8350                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8351
8352         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8353                             btrfs_header_chunk_tree_uuid(c),
8354                             BTRFS_UUID_SIZE);
8355
8356         btrfs_mark_buffer_dirty(c);
8357         /*
8358          * this case can happen in the following case:
8359          *
8360          * 1.overwrite previous root.
8361          *
8362          * 2.reinit reloc data root, this is because we skip pin
8363          * down reloc data tree before which means we can allocate
8364          * same block bytenr here.
8365          */
8366         if (old->start == c->start) {
8367                 btrfs_set_root_generation(&root->root_item,
8368                                           trans->transid);
8369                 root->root_item.level = btrfs_header_level(root->node);
8370                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8371                                         &root->root_key, &root->root_item);
8372                 if (ret) {
8373                         free_extent_buffer(c);
8374                         return ret;
8375                 }
8376         }
8377         free_extent_buffer(old);
8378         root->node = c;
8379         add_root_to_dirty_list(root);
8380         return 0;
8381 }
8382
8383 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8384                                 struct extent_buffer *eb, int tree_root)
8385 {
8386         struct extent_buffer *tmp;
8387         struct btrfs_root_item *ri;
8388         struct btrfs_key key;
8389         u64 bytenr;
8390         u32 leafsize;
8391         int level = btrfs_header_level(eb);
8392         int nritems;
8393         int ret;
8394         int i;
8395
8396         /*
8397          * If we have pinned this block before, don't pin it again.
8398          * This can not only avoid forever loop with broken filesystem
8399          * but also give us some speedups.
8400          */
8401         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8402                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8403                 return 0;
8404
8405         btrfs_pin_extent(fs_info, eb->start, eb->len);
8406
8407         leafsize = btrfs_super_leafsize(fs_info->super_copy);
8408         nritems = btrfs_header_nritems(eb);
8409         for (i = 0; i < nritems; i++) {
8410                 if (level == 0) {
8411                         btrfs_item_key_to_cpu(eb, &key, i);
8412                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8413                                 continue;
8414                         /* Skip the extent root and reloc roots */
8415                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8416                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8417                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8418                                 continue;
8419                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8420                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8421
8422                         /*
8423                          * If at any point we start needing the real root we
8424                          * will have to build a stump root for the root we are
8425                          * in, but for now this doesn't actually use the root so
8426                          * just pass in extent_root.
8427                          */
8428                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8429                                               leafsize, 0);
8430                         if (!extent_buffer_uptodate(tmp)) {
8431                                 fprintf(stderr, "Error reading root block\n");
8432                                 return -EIO;
8433                         }
8434                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8435                         free_extent_buffer(tmp);
8436                         if (ret)
8437                                 return ret;
8438                 } else {
8439                         bytenr = btrfs_node_blockptr(eb, i);
8440
8441                         /* If we aren't the tree root don't read the block */
8442                         if (level == 1 && !tree_root) {
8443                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
8444                                 continue;
8445                         }
8446
8447                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8448                                               leafsize, 0);
8449                         if (!extent_buffer_uptodate(tmp)) {
8450                                 fprintf(stderr, "Error reading tree block\n");
8451                                 return -EIO;
8452                         }
8453                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8454                         free_extent_buffer(tmp);
8455                         if (ret)
8456                                 return ret;
8457                 }
8458         }
8459
8460         return 0;
8461 }
8462
8463 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8464 {
8465         int ret;
8466
8467         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8468         if (ret)
8469                 return ret;
8470
8471         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8472 }
8473
8474 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8475 {
8476         struct btrfs_block_group_cache *cache;
8477         struct btrfs_path *path;
8478         struct extent_buffer *leaf;
8479         struct btrfs_chunk *chunk;
8480         struct btrfs_key key;
8481         int ret;
8482         u64 start;
8483
8484         path = btrfs_alloc_path();
8485         if (!path)
8486                 return -ENOMEM;
8487
8488         key.objectid = 0;
8489         key.type = BTRFS_CHUNK_ITEM_KEY;
8490         key.offset = 0;
8491
8492         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8493         if (ret < 0) {
8494                 btrfs_free_path(path);
8495                 return ret;
8496         }
8497
8498         /*
8499          * We do this in case the block groups were screwed up and had alloc
8500          * bits that aren't actually set on the chunks.  This happens with
8501          * restored images every time and could happen in real life I guess.
8502          */
8503         fs_info->avail_data_alloc_bits = 0;
8504         fs_info->avail_metadata_alloc_bits = 0;
8505         fs_info->avail_system_alloc_bits = 0;
8506
8507         /* First we need to create the in-memory block groups */
8508         while (1) {
8509                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8510                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8511                         if (ret < 0) {
8512                                 btrfs_free_path(path);
8513                                 return ret;
8514                         }
8515                         if (ret) {
8516                                 ret = 0;
8517                                 break;
8518                         }
8519                 }
8520                 leaf = path->nodes[0];
8521                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8522                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8523                         path->slots[0]++;
8524                         continue;
8525                 }
8526
8527                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8528                                        struct btrfs_chunk);
8529                 btrfs_add_block_group(fs_info, 0,
8530                                       btrfs_chunk_type(leaf, chunk),
8531                                       key.objectid, key.offset,
8532                                       btrfs_chunk_length(leaf, chunk));
8533                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8534                                  key.offset + btrfs_chunk_length(leaf, chunk),
8535                                  GFP_NOFS);
8536                 path->slots[0]++;
8537         }
8538         start = 0;
8539         while (1) {
8540                 cache = btrfs_lookup_first_block_group(fs_info, start);
8541                 if (!cache)
8542                         break;
8543                 cache->cached = 1;
8544                 start = cache->key.objectid + cache->key.offset;
8545         }
8546
8547         btrfs_free_path(path);
8548         return 0;
8549 }
8550
8551 static int reset_balance(struct btrfs_trans_handle *trans,
8552                          struct btrfs_fs_info *fs_info)
8553 {
8554         struct btrfs_root *root = fs_info->tree_root;
8555         struct btrfs_path *path;
8556         struct extent_buffer *leaf;
8557         struct btrfs_key key;
8558         int del_slot, del_nr = 0;
8559         int ret;
8560         int found = 0;
8561
8562         path = btrfs_alloc_path();
8563         if (!path)
8564                 return -ENOMEM;
8565
8566         key.objectid = BTRFS_BALANCE_OBJECTID;
8567         key.type = BTRFS_BALANCE_ITEM_KEY;
8568         key.offset = 0;
8569
8570         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8571         if (ret) {
8572                 if (ret > 0)
8573                         ret = 0;
8574                 if (!ret)
8575                         goto reinit_data_reloc;
8576                 else
8577                         goto out;
8578         }
8579
8580         ret = btrfs_del_item(trans, root, path);
8581         if (ret)
8582                 goto out;
8583         btrfs_release_path(path);
8584
8585         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8586         key.type = BTRFS_ROOT_ITEM_KEY;
8587         key.offset = 0;
8588
8589         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8590         if (ret < 0)
8591                 goto out;
8592         while (1) {
8593                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8594                         if (!found)
8595                                 break;
8596
8597                         if (del_nr) {
8598                                 ret = btrfs_del_items(trans, root, path,
8599                                                       del_slot, del_nr);
8600                                 del_nr = 0;
8601                                 if (ret)
8602                                         goto out;
8603                         }
8604                         key.offset++;
8605                         btrfs_release_path(path);
8606
8607                         found = 0;
8608                         ret = btrfs_search_slot(trans, root, &key, path,
8609                                                 -1, 1);
8610                         if (ret < 0)
8611                                 goto out;
8612                         continue;
8613                 }
8614                 found = 1;
8615                 leaf = path->nodes[0];
8616                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8617                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8618                         break;
8619                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8620                         path->slots[0]++;
8621                         continue;
8622                 }
8623                 if (!del_nr) {
8624                         del_slot = path->slots[0];
8625                         del_nr = 1;
8626                 } else {
8627                         del_nr++;
8628                 }
8629                 path->slots[0]++;
8630         }
8631
8632         if (del_nr) {
8633                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8634                 if (ret)
8635                         goto out;
8636         }
8637         btrfs_release_path(path);
8638
8639 reinit_data_reloc:
8640         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8641         key.type = BTRFS_ROOT_ITEM_KEY;
8642         key.offset = (u64)-1;
8643         root = btrfs_read_fs_root(fs_info, &key);
8644         if (IS_ERR(root)) {
8645                 fprintf(stderr, "Error reading data reloc tree\n");
8646                 ret = PTR_ERR(root);
8647                 goto out;
8648         }
8649         record_root_in_trans(trans, root);
8650         ret = btrfs_fsck_reinit_root(trans, root, 0);
8651         if (ret)
8652                 goto out;
8653         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8654 out:
8655         btrfs_free_path(path);
8656         return ret;
8657 }
8658
8659 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8660                               struct btrfs_fs_info *fs_info)
8661 {
8662         u64 start = 0;
8663         int ret;
8664
8665         /*
8666          * The only reason we don't do this is because right now we're just
8667          * walking the trees we find and pinning down their bytes, we don't look
8668          * at any of the leaves.  In order to do mixed groups we'd have to check
8669          * the leaves of any fs roots and pin down the bytes for any file
8670          * extents we find.  Not hard but why do it if we don't have to?
8671          */
8672         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8673                 fprintf(stderr, "We don't support re-initing the extent tree "
8674                         "for mixed block groups yet, please notify a btrfs "
8675                         "developer you want to do this so they can add this "
8676                         "functionality.\n");
8677                 return -EINVAL;
8678         }
8679
8680         /*
8681          * first we need to walk all of the trees except the extent tree and pin
8682          * down the bytes that are in use so we don't overwrite any existing
8683          * metadata.
8684          */
8685         ret = pin_metadata_blocks(fs_info);
8686         if (ret) {
8687                 fprintf(stderr, "error pinning down used bytes\n");
8688                 return ret;
8689         }
8690
8691         /*
8692          * Need to drop all the block groups since we're going to recreate all
8693          * of them again.
8694          */
8695         btrfs_free_block_groups(fs_info);
8696         ret = reset_block_groups(fs_info);
8697         if (ret) {
8698                 fprintf(stderr, "error resetting the block groups\n");
8699                 return ret;
8700         }
8701
8702         /* Ok we can allocate now, reinit the extent root */
8703         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8704         if (ret) {
8705                 fprintf(stderr, "extent root initialization failed\n");
8706                 /*
8707                  * When the transaction code is updated we should end the
8708                  * transaction, but for now progs only knows about commit so
8709                  * just return an error.
8710                  */
8711                 return ret;
8712         }
8713
8714         /*
8715          * Now we have all the in-memory block groups setup so we can make
8716          * allocations properly, and the metadata we care about is safe since we
8717          * pinned all of it above.
8718          */
8719         while (1) {
8720                 struct btrfs_block_group_cache *cache;
8721
8722                 cache = btrfs_lookup_first_block_group(fs_info, start);
8723                 if (!cache)
8724                         break;
8725                 start = cache->key.objectid + cache->key.offset;
8726                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8727                                         &cache->key, &cache->item,
8728                                         sizeof(cache->item));
8729                 if (ret) {
8730                         fprintf(stderr, "Error adding block group\n");
8731                         return ret;
8732                 }
8733                 btrfs_extent_post_op(trans, fs_info->extent_root);
8734         }
8735
8736         ret = reset_balance(trans, fs_info);
8737         if (ret)
8738                 fprintf(stderr, "error reseting the pending balance\n");
8739
8740         return ret;
8741 }
8742
8743 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8744 {
8745         struct btrfs_path *path;
8746         struct btrfs_trans_handle *trans;
8747         struct btrfs_key key;
8748         int ret;
8749
8750         printf("Recowing metadata block %llu\n", eb->start);
8751         key.objectid = btrfs_header_owner(eb);
8752         key.type = BTRFS_ROOT_ITEM_KEY;
8753         key.offset = (u64)-1;
8754
8755         root = btrfs_read_fs_root(root->fs_info, &key);
8756         if (IS_ERR(root)) {
8757                 fprintf(stderr, "Couldn't find owner root %llu\n",
8758                         key.objectid);
8759                 return PTR_ERR(root);
8760         }
8761
8762         path = btrfs_alloc_path();
8763         if (!path)
8764                 return -ENOMEM;
8765
8766         trans = btrfs_start_transaction(root, 1);
8767         if (IS_ERR(trans)) {
8768                 btrfs_free_path(path);
8769                 return PTR_ERR(trans);
8770         }
8771
8772         path->lowest_level = btrfs_header_level(eb);
8773         if (path->lowest_level)
8774                 btrfs_node_key_to_cpu(eb, &key, 0);
8775         else
8776                 btrfs_item_key_to_cpu(eb, &key, 0);
8777
8778         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8779         btrfs_commit_transaction(trans, root);
8780         btrfs_free_path(path);
8781         return ret;
8782 }
8783
8784 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8785 {
8786         struct btrfs_path *path;
8787         struct btrfs_trans_handle *trans;
8788         struct btrfs_key key;
8789         int ret;
8790
8791         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8792                bad->key.type, bad->key.offset);
8793         key.objectid = bad->root_id;
8794         key.type = BTRFS_ROOT_ITEM_KEY;
8795         key.offset = (u64)-1;
8796
8797         root = btrfs_read_fs_root(root->fs_info, &key);
8798         if (IS_ERR(root)) {
8799                 fprintf(stderr, "Couldn't find owner root %llu\n",
8800                         key.objectid);
8801                 return PTR_ERR(root);
8802         }
8803
8804         path = btrfs_alloc_path();
8805         if (!path)
8806                 return -ENOMEM;
8807
8808         trans = btrfs_start_transaction(root, 1);
8809         if (IS_ERR(trans)) {
8810                 btrfs_free_path(path);
8811                 return PTR_ERR(trans);
8812         }
8813
8814         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8815         if (ret) {
8816                 if (ret > 0)
8817                         ret = 0;
8818                 goto out;
8819         }
8820         ret = btrfs_del_item(trans, root, path);
8821 out:
8822         btrfs_commit_transaction(trans, root);
8823         btrfs_free_path(path);
8824         return ret;
8825 }
8826
8827 static int zero_log_tree(struct btrfs_root *root)
8828 {
8829         struct btrfs_trans_handle *trans;
8830         int ret;
8831
8832         trans = btrfs_start_transaction(root, 1);
8833         if (IS_ERR(trans)) {
8834                 ret = PTR_ERR(trans);
8835                 return ret;
8836         }
8837         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8838         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8839         ret = btrfs_commit_transaction(trans, root);
8840         return ret;
8841 }
8842
8843 static int populate_csum(struct btrfs_trans_handle *trans,
8844                          struct btrfs_root *csum_root, char *buf, u64 start,
8845                          u64 len)
8846 {
8847         u64 offset = 0;
8848         u64 sectorsize;
8849         int ret = 0;
8850
8851         while (offset < len) {
8852                 sectorsize = csum_root->sectorsize;
8853                 ret = read_extent_data(csum_root, buf, start + offset,
8854                                        &sectorsize, 0);
8855                 if (ret)
8856                         break;
8857                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8858                                             start + offset, buf, sectorsize);
8859                 if (ret)
8860                         break;
8861                 offset += sectorsize;
8862         }
8863         return ret;
8864 }
8865
8866 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8867                                       struct btrfs_root *csum_root,
8868                                       struct btrfs_root *cur_root)
8869 {
8870         struct btrfs_path *path;
8871         struct btrfs_key key;
8872         struct extent_buffer *node;
8873         struct btrfs_file_extent_item *fi;
8874         char *buf = NULL;
8875         u64 start = 0;
8876         u64 len = 0;
8877         int slot = 0;
8878         int ret = 0;
8879
8880         path = btrfs_alloc_path();
8881         if (!path)
8882                 return -ENOMEM;
8883         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8884         if (!buf) {
8885                 ret = -ENOMEM;
8886                 goto out;
8887         }
8888
8889         key.objectid = 0;
8890         key.offset = 0;
8891         key.type = 0;
8892
8893         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8894         if (ret < 0)
8895                 goto out;
8896         /* Iterate all regular file extents and fill its csum */
8897         while (1) {
8898                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8899
8900                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8901                         goto next;
8902                 node = path->nodes[0];
8903                 slot = path->slots[0];
8904                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8905                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8906                         goto next;
8907                 start = btrfs_file_extent_disk_bytenr(node, fi);
8908                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8909
8910                 ret = populate_csum(trans, csum_root, buf, start, len);
8911                 if (ret == -EEXIST)
8912                         ret = 0;
8913                 if (ret < 0)
8914                         goto out;
8915 next:
8916                 /*
8917                  * TODO: if next leaf is corrupted, jump to nearest next valid
8918                  * leaf.
8919                  */
8920                 ret = btrfs_next_item(cur_root, path);
8921                 if (ret < 0)
8922                         goto out;
8923                 if (ret > 0) {
8924                         ret = 0;
8925                         goto out;
8926                 }
8927         }
8928
8929 out:
8930         btrfs_free_path(path);
8931         free(buf);
8932         return ret;
8933 }
8934
8935 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8936                                   struct btrfs_root *csum_root)
8937 {
8938         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8939         struct btrfs_path *path;
8940         struct btrfs_root *tree_root = fs_info->tree_root;
8941         struct btrfs_root *cur_root;
8942         struct extent_buffer *node;
8943         struct btrfs_key key;
8944         int slot = 0;
8945         int ret = 0;
8946
8947         path = btrfs_alloc_path();
8948         if (!path)
8949                 return -ENOMEM;
8950
8951         key.objectid = BTRFS_FS_TREE_OBJECTID;
8952         key.offset = 0;
8953         key.type = BTRFS_ROOT_ITEM_KEY;
8954
8955         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8956         if (ret < 0)
8957                 goto out;
8958         if (ret > 0) {
8959                 ret = -ENOENT;
8960                 goto out;
8961         }
8962
8963         while (1) {
8964                 node = path->nodes[0];
8965                 slot = path->slots[0];
8966                 btrfs_item_key_to_cpu(node, &key, slot);
8967                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8968                         goto out;
8969                 if (key.type != BTRFS_ROOT_ITEM_KEY)
8970                         goto next;
8971                 if (!is_fstree(key.objectid))
8972                         goto next;
8973                 key.offset = (u64)-1;
8974
8975                 cur_root = btrfs_read_fs_root(fs_info, &key);
8976                 if (IS_ERR(cur_root) || !cur_root) {
8977                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8978                                 key.objectid);
8979                         goto out;
8980                 }
8981                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8982                                 cur_root);
8983                 if (ret < 0)
8984                         goto out;
8985 next:
8986                 ret = btrfs_next_item(tree_root, path);
8987                 if (ret > 0) {
8988                         ret = 0;
8989                         goto out;
8990                 }
8991                 if (ret < 0)
8992                         goto out;
8993         }
8994
8995 out:
8996         btrfs_free_path(path);
8997         return ret;
8998 }
8999
9000 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9001                                       struct btrfs_root *csum_root)
9002 {
9003         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9004         struct btrfs_path *path;
9005         struct btrfs_extent_item *ei;
9006         struct extent_buffer *leaf;
9007         char *buf;
9008         struct btrfs_key key;
9009         int ret;
9010
9011         path = btrfs_alloc_path();
9012         if (!path)
9013                 return -ENOMEM;
9014
9015         key.objectid = 0;
9016         key.type = BTRFS_EXTENT_ITEM_KEY;
9017         key.offset = 0;
9018
9019         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9020         if (ret < 0) {
9021                 btrfs_free_path(path);
9022                 return ret;
9023         }
9024
9025         buf = malloc(csum_root->sectorsize);
9026         if (!buf) {
9027                 btrfs_free_path(path);
9028                 return -ENOMEM;
9029         }
9030
9031         while (1) {
9032                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9033                         ret = btrfs_next_leaf(extent_root, path);
9034                         if (ret < 0)
9035                                 break;
9036                         if (ret) {
9037                                 ret = 0;
9038                                 break;
9039                         }
9040                 }
9041                 leaf = path->nodes[0];
9042
9043                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9044                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9045                         path->slots[0]++;
9046                         continue;
9047                 }
9048
9049                 ei = btrfs_item_ptr(leaf, path->slots[0],
9050                                     struct btrfs_extent_item);
9051                 if (!(btrfs_extent_flags(leaf, ei) &
9052                       BTRFS_EXTENT_FLAG_DATA)) {
9053                         path->slots[0]++;
9054                         continue;
9055                 }
9056
9057                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9058                                     key.offset);
9059                 if (ret)
9060                         break;
9061                 path->slots[0]++;
9062         }
9063
9064         btrfs_free_path(path);
9065         free(buf);
9066         return ret;
9067 }
9068
9069 /*
9070  * Recalculate the csum and put it into the csum tree.
9071  *
9072  * Extent tree init will wipe out all the extent info, so in that case, we
9073  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9074  * will use fs/subvol trees to init the csum tree.
9075  */
9076 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9077                           struct btrfs_root *csum_root,
9078                           int search_fs_tree)
9079 {
9080         if (search_fs_tree)
9081                 return fill_csum_tree_from_fs(trans, csum_root);
9082         else
9083                 return fill_csum_tree_from_extent(trans, csum_root);
9084 }
9085
9086 struct root_item_info {
9087         /* level of the root */
9088         u8 level;
9089         /* number of nodes at this level, must be 1 for a root */
9090         int node_count;
9091         u64 bytenr;
9092         u64 gen;
9093         struct cache_extent cache_extent;
9094 };
9095
9096 static struct cache_tree *roots_info_cache = NULL;
9097
9098 static void free_roots_info_cache(void)
9099 {
9100         if (!roots_info_cache)
9101                 return;
9102
9103         while (!cache_tree_empty(roots_info_cache)) {
9104                 struct cache_extent *entry;
9105                 struct root_item_info *rii;
9106
9107                 entry = first_cache_extent(roots_info_cache);
9108                 if (!entry)
9109                         break;
9110                 remove_cache_extent(roots_info_cache, entry);
9111                 rii = container_of(entry, struct root_item_info, cache_extent);
9112                 free(rii);
9113         }
9114
9115         free(roots_info_cache);
9116         roots_info_cache = NULL;
9117 }
9118
9119 static int build_roots_info_cache(struct btrfs_fs_info *info)
9120 {
9121         int ret = 0;
9122         struct btrfs_key key;
9123         struct extent_buffer *leaf;
9124         struct btrfs_path *path;
9125
9126         if (!roots_info_cache) {
9127                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9128                 if (!roots_info_cache)
9129                         return -ENOMEM;
9130                 cache_tree_init(roots_info_cache);
9131         }
9132
9133         path = btrfs_alloc_path();
9134         if (!path)
9135                 return -ENOMEM;
9136
9137         key.objectid = 0;
9138         key.type = BTRFS_EXTENT_ITEM_KEY;
9139         key.offset = 0;
9140
9141         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9142         if (ret < 0)
9143                 goto out;
9144         leaf = path->nodes[0];
9145
9146         while (1) {
9147                 struct btrfs_key found_key;
9148                 struct btrfs_extent_item *ei;
9149                 struct btrfs_extent_inline_ref *iref;
9150                 int slot = path->slots[0];
9151                 int type;
9152                 u64 flags;
9153                 u64 root_id;
9154                 u8 level;
9155                 struct cache_extent *entry;
9156                 struct root_item_info *rii;
9157
9158                 if (slot >= btrfs_header_nritems(leaf)) {
9159                         ret = btrfs_next_leaf(info->extent_root, path);
9160                         if (ret < 0) {
9161                                 break;
9162                         } else if (ret) {
9163                                 ret = 0;
9164                                 break;
9165                         }
9166                         leaf = path->nodes[0];
9167                         slot = path->slots[0];
9168                 }
9169
9170                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9171
9172                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9173                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9174                         goto next;
9175
9176                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9177                 flags = btrfs_extent_flags(leaf, ei);
9178
9179                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9180                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9181                         goto next;
9182
9183                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9184                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9185                         level = found_key.offset;
9186                 } else {
9187                         struct btrfs_tree_block_info *info;
9188
9189                         info = (struct btrfs_tree_block_info *)(ei + 1);
9190                         iref = (struct btrfs_extent_inline_ref *)(info + 1);
9191                         level = btrfs_tree_block_level(leaf, info);
9192                 }
9193
9194                 /*
9195                  * For a root extent, it must be of the following type and the
9196                  * first (and only one) iref in the item.
9197                  */
9198                 type = btrfs_extent_inline_ref_type(leaf, iref);
9199                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9200                         goto next;
9201
9202                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9203                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9204                 if (!entry) {
9205                         rii = malloc(sizeof(struct root_item_info));
9206                         if (!rii) {
9207                                 ret = -ENOMEM;
9208                                 goto out;
9209                         }
9210                         rii->cache_extent.start = root_id;
9211                         rii->cache_extent.size = 1;
9212                         rii->level = (u8)-1;
9213                         entry = &rii->cache_extent;
9214                         ret = insert_cache_extent(roots_info_cache, entry);
9215                         ASSERT(ret == 0);
9216                 } else {
9217                         rii = container_of(entry, struct root_item_info,
9218                                            cache_extent);
9219                 }
9220
9221                 ASSERT(rii->cache_extent.start == root_id);
9222                 ASSERT(rii->cache_extent.size == 1);
9223
9224                 if (level > rii->level || rii->level == (u8)-1) {
9225                         rii->level = level;
9226                         rii->bytenr = found_key.objectid;
9227                         rii->gen = btrfs_extent_generation(leaf, ei);
9228                         rii->node_count = 1;
9229                 } else if (level == rii->level) {
9230                         rii->node_count++;
9231                 }
9232 next:
9233                 path->slots[0]++;
9234         }
9235
9236 out:
9237         btrfs_free_path(path);
9238
9239         return ret;
9240 }
9241
9242 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9243                                   struct btrfs_path *path,
9244                                   const struct btrfs_key *root_key,
9245                                   const int read_only_mode)
9246 {
9247         const u64 root_id = root_key->objectid;
9248         struct cache_extent *entry;
9249         struct root_item_info *rii;
9250         struct btrfs_root_item ri;
9251         unsigned long offset;
9252
9253         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9254         if (!entry) {
9255                 fprintf(stderr,
9256                         "Error: could not find extent items for root %llu\n",
9257                         root_key->objectid);
9258                 return -ENOENT;
9259         }
9260
9261         rii = container_of(entry, struct root_item_info, cache_extent);
9262         ASSERT(rii->cache_extent.start == root_id);
9263         ASSERT(rii->cache_extent.size == 1);
9264
9265         if (rii->node_count != 1) {
9266                 fprintf(stderr,
9267                         "Error: could not find btree root extent for root %llu\n",
9268                         root_id);
9269                 return -ENOENT;
9270         }
9271
9272         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9273         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9274
9275         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9276             btrfs_root_level(&ri) != rii->level ||
9277             btrfs_root_generation(&ri) != rii->gen) {
9278
9279                 /*
9280                  * If we're in repair mode but our caller told us to not update
9281                  * the root item, i.e. just check if it needs to be updated, don't
9282                  * print this message, since the caller will call us again shortly
9283                  * for the same root item without read only mode (the caller will
9284                  * open a transaction first).
9285                  */
9286                 if (!(read_only_mode && repair))
9287                         fprintf(stderr,
9288                                 "%sroot item for root %llu,"
9289                                 " current bytenr %llu, current gen %llu, current level %u,"
9290                                 " new bytenr %llu, new gen %llu, new level %u\n",
9291                                 (read_only_mode ? "" : "fixing "),
9292                                 root_id,
9293                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9294                                 btrfs_root_level(&ri),
9295                                 rii->bytenr, rii->gen, rii->level);
9296
9297                 if (btrfs_root_generation(&ri) > rii->gen) {
9298                         fprintf(stderr,
9299                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9300                                 root_id, btrfs_root_generation(&ri), rii->gen);
9301                         return -EINVAL;
9302                 }
9303
9304                 if (!read_only_mode) {
9305                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9306                         btrfs_set_root_level(&ri, rii->level);
9307                         btrfs_set_root_generation(&ri, rii->gen);
9308                         write_extent_buffer(path->nodes[0], &ri,
9309                                             offset, sizeof(ri));
9310                 }
9311
9312                 return 1;
9313         }
9314
9315         return 0;
9316 }
9317
9318 /*
9319  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9320  * caused read-only snapshots to be corrupted if they were created at a moment
9321  * when the source subvolume/snapshot had orphan items. The issue was that the
9322  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9323  * node instead of the post orphan cleanup root node.
9324  * So this function, and its callees, just detects and fixes those cases. Even
9325  * though the regression was for read-only snapshots, this function applies to
9326  * any snapshot/subvolume root.
9327  * This must be run before any other repair code - not doing it so, makes other
9328  * repair code delete or modify backrefs in the extent tree for example, which
9329  * will result in an inconsistent fs after repairing the root items.
9330  */
9331 static int repair_root_items(struct btrfs_fs_info *info)
9332 {
9333         struct btrfs_path *path = NULL;
9334         struct btrfs_key key;
9335         struct extent_buffer *leaf;
9336         struct btrfs_trans_handle *trans = NULL;
9337         int ret = 0;
9338         int bad_roots = 0;
9339         int need_trans = 0;
9340
9341         ret = build_roots_info_cache(info);
9342         if (ret)
9343                 goto out;
9344
9345         path = btrfs_alloc_path();
9346         if (!path) {
9347                 ret = -ENOMEM;
9348                 goto out;
9349         }
9350
9351         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9352         key.type = BTRFS_ROOT_ITEM_KEY;
9353         key.offset = 0;
9354
9355 again:
9356         /*
9357          * Avoid opening and committing transactions if a leaf doesn't have
9358          * any root items that need to be fixed, so that we avoid rotating
9359          * backup roots unnecessarily.
9360          */
9361         if (need_trans) {
9362                 trans = btrfs_start_transaction(info->tree_root, 1);
9363                 if (IS_ERR(trans)) {
9364                         ret = PTR_ERR(trans);
9365                         goto out;
9366                 }
9367         }
9368
9369         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9370                                 0, trans ? 1 : 0);
9371         if (ret < 0)
9372                 goto out;
9373         leaf = path->nodes[0];
9374
9375         while (1) {
9376                 struct btrfs_key found_key;
9377
9378                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9379                         int no_more_keys = find_next_key(path, &key);
9380
9381                         btrfs_release_path(path);
9382                         if (trans) {
9383                                 ret = btrfs_commit_transaction(trans,
9384                                                                info->tree_root);
9385                                 trans = NULL;
9386                                 if (ret < 0)
9387                                         goto out;
9388                         }
9389                         need_trans = 0;
9390                         if (no_more_keys)
9391                                 break;
9392                         goto again;
9393                 }
9394
9395                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9396
9397                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9398                         goto next;
9399                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9400                         goto next;
9401
9402                 ret = maybe_repair_root_item(info, path, &found_key,
9403                                              trans ? 0 : 1);
9404                 if (ret < 0)
9405                         goto out;
9406                 if (ret) {
9407                         if (!trans && repair) {
9408                                 need_trans = 1;
9409                                 key = found_key;
9410                                 btrfs_release_path(path);
9411                                 goto again;
9412                         }
9413                         bad_roots++;
9414                 }
9415 next:
9416                 path->slots[0]++;
9417         }
9418         ret = 0;
9419 out:
9420         free_roots_info_cache();
9421         btrfs_free_path(path);
9422         if (trans)
9423                 btrfs_commit_transaction(trans, info->tree_root);
9424         if (ret < 0)
9425                 return ret;
9426
9427         return bad_roots;
9428 }
9429
9430 const char * const cmd_check_usage[] = {
9431         "btrfs check [options] <device>",
9432         "Check structural inegrity of a filesystem (unmounted).",
9433         "Check structural inegrity of an unmounted filesystem. Verify internal",
9434         "trees' consistency and item connectivity. In the repair mode try to",
9435         "fix the problems found.",
9436         "WARNING: the repair mode is considered dangerous",
9437         "",
9438         "-s|--super <superblock>     use this superblock copy",
9439         "-b|--backup                 use the backup root copy",
9440         "--repair                    try to repair the filesystem",
9441         "--readonly                  run in read-only mode (default)",
9442         "--init-csum-tree            create a new CRC tree",
9443         "--init-extent-tree          create a new extent tree",
9444         "--check-data-csum           verify checkums of data blocks",
9445         "-Q|--qgroup-report           print a report on qgroup consistency",
9446         "-E|--subvol-extents <subvolid>",
9447         "                            print subvolume extents and sharing state",
9448         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9449         "-p|--progress               indicate progress",
9450         NULL
9451 };
9452
9453 int cmd_check(int argc, char **argv)
9454 {
9455         struct cache_tree root_cache;
9456         struct btrfs_root *root;
9457         struct btrfs_fs_info *info;
9458         u64 bytenr = 0;
9459         u64 subvolid = 0;
9460         u64 tree_root_bytenr = 0;
9461         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9462         int ret;
9463         u64 num;
9464         int init_csum_tree = 0;
9465         int readonly = 0;
9466         int qgroup_report = 0;
9467         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9468
9469         while(1) {
9470                 int c;
9471                 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9472                         OPT_CHECK_CSUM, OPT_READONLY };
9473                 static const struct option long_options[] = {
9474                         { "super", required_argument, NULL, 's' },
9475                         { "repair", no_argument, NULL, OPT_REPAIR },
9476                         { "readonly", no_argument, NULL, OPT_READONLY },
9477                         { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9478                         { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9479                         { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9480                         { "backup", no_argument, NULL, 'b' },
9481                         { "subvol-extents", required_argument, NULL, 'E' },
9482                         { "qgroup-report", no_argument, NULL, 'Q' },
9483                         { "tree-root", required_argument, NULL, 'r' },
9484                         { "progress", no_argument, NULL, 'p' },
9485                         { NULL, 0, NULL, 0}
9486                 };
9487
9488                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9489                 if (c < 0)
9490                         break;
9491                 switch(c) {
9492                         case 'a': /* ignored */ break;
9493                         case 'b':
9494                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9495                                 break;
9496                         case 's':
9497                                 num = arg_strtou64(optarg);
9498                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9499                                         fprintf(stderr,
9500                                                 "ERROR: super mirror should be less than: %d\n",
9501                                                 BTRFS_SUPER_MIRROR_MAX);
9502                                         exit(1);
9503                                 }
9504                                 bytenr = btrfs_sb_offset(((int)num));
9505                                 printf("using SB copy %llu, bytenr %llu\n", num,
9506                                        (unsigned long long)bytenr);
9507                                 break;
9508                         case 'Q':
9509                                 qgroup_report = 1;
9510                                 break;
9511                         case 'E':
9512                                 subvolid = arg_strtou64(optarg);
9513                                 break;
9514                         case 'r':
9515                                 tree_root_bytenr = arg_strtou64(optarg);
9516                                 break;
9517                         case 'p':
9518                                 ctx.progress_enabled = true;
9519                                 break;
9520                         case '?':
9521                         case 'h':
9522                                 usage(cmd_check_usage);
9523                         case OPT_REPAIR:
9524                                 printf("enabling repair mode\n");
9525                                 repair = 1;
9526                                 ctree_flags |= OPEN_CTREE_WRITES;
9527                                 break;
9528                         case OPT_READONLY:
9529                                 readonly = 1;
9530                                 break;
9531                         case OPT_INIT_CSUM:
9532                                 printf("Creating a new CRC tree\n");
9533                                 init_csum_tree = 1;
9534                                 repair = 1;
9535                                 ctree_flags |= OPEN_CTREE_WRITES;
9536                                 break;
9537                         case OPT_INIT_EXTENT:
9538                                 init_extent_tree = 1;
9539                                 ctree_flags |= (OPEN_CTREE_WRITES |
9540                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9541                                 repair = 1;
9542                                 break;
9543                         case OPT_CHECK_CSUM:
9544                                 check_data_csum = 1;
9545                                 break;
9546                 }
9547         }
9548         argc = argc - optind;
9549
9550         if (check_argc_exact(argc, 1))
9551                 usage(cmd_check_usage);
9552
9553         if (ctx.progress_enabled) {
9554                 ctx.tp = TASK_NOTHING;
9555                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9556         }
9557
9558         /* This check is the only reason for --readonly to exist */
9559         if (readonly && repair) {
9560                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9561                 exit(1);
9562         }
9563
9564         radix_tree_init();
9565         cache_tree_init(&root_cache);
9566
9567         if((ret = check_mounted(argv[optind])) < 0) {
9568                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9569                 goto err_out;
9570         } else if(ret) {
9571                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9572                 ret = -EBUSY;
9573                 goto err_out;
9574         }
9575
9576         /* only allow partial opening under repair mode */
9577         if (repair)
9578                 ctree_flags |= OPEN_CTREE_PARTIAL;
9579
9580         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9581                                   ctree_flags);
9582         if (!info) {
9583                 fprintf(stderr, "Couldn't open file system\n");
9584                 ret = -EIO;
9585                 goto err_out;
9586         }
9587
9588         global_info = info;
9589         root = info->fs_root;
9590
9591         /*
9592          * repair mode will force us to commit transaction which
9593          * will make us fail to load log tree when mounting.
9594          */
9595         if (repair && btrfs_super_log_root(info->super_copy)) {
9596                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9597                 if (!ret) {
9598                         ret = 1;
9599                         goto close_out;
9600                 }
9601                 ret = zero_log_tree(root);
9602                 if (ret) {
9603                         fprintf(stderr, "fail to zero log tree\n");
9604                         goto close_out;
9605                 }
9606         }
9607
9608         uuid_unparse(info->super_copy->fsid, uuidbuf);
9609         if (qgroup_report) {
9610                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9611                        uuidbuf);
9612                 ret = qgroup_verify_all(info);
9613                 if (ret == 0)
9614                         print_qgroup_report(1);
9615                 goto close_out;
9616         }
9617         if (subvolid) {
9618                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9619                        subvolid, argv[optind], uuidbuf);
9620                 ret = print_extent_state(info, subvolid);
9621                 goto close_out;
9622         }
9623         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9624
9625         if (!extent_buffer_uptodate(info->tree_root->node) ||
9626             !extent_buffer_uptodate(info->dev_root->node) ||
9627             !extent_buffer_uptodate(info->chunk_root->node)) {
9628                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9629                 ret = -EIO;
9630                 goto close_out;
9631         }
9632
9633         if (init_extent_tree || init_csum_tree) {
9634                 struct btrfs_trans_handle *trans;
9635
9636                 trans = btrfs_start_transaction(info->extent_root, 0);
9637                 if (IS_ERR(trans)) {
9638                         fprintf(stderr, "Error starting transaction\n");
9639                         ret = PTR_ERR(trans);
9640                         goto close_out;
9641                 }
9642
9643                 if (init_extent_tree) {
9644                         printf("Creating a new extent tree\n");
9645                         ret = reinit_extent_tree(trans, info);
9646                         if (ret)
9647                                 goto close_out;
9648                 }
9649
9650                 if (init_csum_tree) {
9651                         fprintf(stderr, "Reinit crc root\n");
9652                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9653                         if (ret) {
9654                                 fprintf(stderr, "crc root initialization failed\n");
9655                                 ret = -EIO;
9656                                 goto close_out;
9657                         }
9658
9659                         ret = fill_csum_tree(trans, info->csum_root,
9660                                              init_extent_tree);
9661                         if (ret) {
9662                                 fprintf(stderr, "crc refilling failed\n");
9663                                 return -EIO;
9664                         }
9665                 }
9666                 /*
9667                  * Ok now we commit and run the normal fsck, which will add
9668                  * extent entries for all of the items it finds.
9669                  */
9670                 ret = btrfs_commit_transaction(trans, info->extent_root);
9671                 if (ret)
9672                         goto close_out;
9673         }
9674         if (!extent_buffer_uptodate(info->extent_root->node)) {
9675                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9676                 ret = -EIO;
9677                 goto close_out;
9678         }
9679         if (!extent_buffer_uptodate(info->csum_root->node)) {
9680                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9681                 ret = -EIO;
9682                 goto close_out;
9683         }
9684
9685         if (!ctx.progress_enabled)
9686                 fprintf(stderr, "checking extents\n");
9687         ret = check_chunks_and_extents(root);
9688         if (ret)
9689                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9690
9691         ret = repair_root_items(info);
9692         if (ret < 0)
9693                 goto close_out;
9694         if (repair) {
9695                 fprintf(stderr, "Fixed %d roots.\n", ret);
9696                 ret = 0;
9697         } else if (ret > 0) {
9698                 fprintf(stderr,
9699                        "Found %d roots with an outdated root item.\n",
9700                        ret);
9701                 fprintf(stderr,
9702                         "Please run a filesystem check with the option --repair to fix them.\n");
9703                 ret = 1;
9704                 goto close_out;
9705         }
9706
9707         if (!ctx.progress_enabled)
9708                 fprintf(stderr, "checking free space cache\n");
9709         ret = check_space_cache(root);
9710         if (ret)
9711                 goto out;
9712
9713         /*
9714          * We used to have to have these hole extents in between our real
9715          * extents so if we don't have this flag set we need to make sure there
9716          * are no gaps in the file extents for inodes, otherwise we can just
9717          * ignore it when this happens.
9718          */
9719         no_holes = btrfs_fs_incompat(root->fs_info,
9720                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9721         if (!ctx.progress_enabled)
9722                 fprintf(stderr, "checking fs roots\n");
9723         ret = check_fs_roots(root, &root_cache);
9724         if (ret)
9725                 goto out;
9726
9727         fprintf(stderr, "checking csums\n");
9728         ret = check_csums(root);
9729         if (ret)
9730                 goto out;
9731
9732         fprintf(stderr, "checking root refs\n");
9733         ret = check_root_refs(root, &root_cache);
9734         if (ret)
9735                 goto out;
9736
9737         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9738                 struct extent_buffer *eb;
9739
9740                 eb = list_first_entry(&root->fs_info->recow_ebs,
9741                                       struct extent_buffer, recow);
9742                 list_del_init(&eb->recow);
9743                 ret = recow_extent_buffer(root, eb);
9744                 if (ret)
9745                         break;
9746         }
9747
9748         while (!list_empty(&delete_items)) {
9749                 struct bad_item *bad;
9750
9751                 bad = list_first_entry(&delete_items, struct bad_item, list);
9752                 list_del_init(&bad->list);
9753                 if (repair)
9754                         ret = delete_bad_item(root, bad);
9755                 free(bad);
9756         }
9757
9758         if (info->quota_enabled) {
9759                 int err;
9760                 fprintf(stderr, "checking quota groups\n");
9761                 err = qgroup_verify_all(info);
9762                 if (err)
9763                         goto out;
9764         }
9765
9766         if (!list_empty(&root->fs_info->recow_ebs)) {
9767                 fprintf(stderr, "Transid errors in file system\n");
9768                 ret = 1;
9769         }
9770 out:
9771         print_qgroup_report(0);
9772         if (found_old_backref) { /*
9773                  * there was a disk format change when mixed
9774                  * backref was in testing tree. The old format
9775                  * existed about one week.
9776                  */
9777                 printf("\n * Found old mixed backref format. "
9778                        "The old format is not supported! *"
9779                        "\n * Please mount the FS in readonly mode, "
9780                        "backup data and re-format the FS. *\n\n");
9781                 ret = 1;
9782         }
9783         printf("found %llu bytes used err is %d\n",
9784                (unsigned long long)bytes_used, ret);
9785         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9786         printf("total tree bytes: %llu\n",
9787                (unsigned long long)total_btree_bytes);
9788         printf("total fs tree bytes: %llu\n",
9789                (unsigned long long)total_fs_tree_bytes);
9790         printf("total extent tree bytes: %llu\n",
9791                (unsigned long long)total_extent_tree_bytes);
9792         printf("btree space waste bytes: %llu\n",
9793                (unsigned long long)btree_space_waste);
9794         printf("file data blocks allocated: %llu\n referenced %llu\n",
9795                 (unsigned long long)data_bytes_allocated,
9796                 (unsigned long long)data_bytes_referenced);
9797
9798         free_root_recs_tree(&root_cache);
9799 close_out:
9800         close_ctree(root);
9801 err_out:
9802         if (ctx.progress_enabled)
9803                 task_deinit(ctx.info);
9804
9805         return ret;
9806 }