btrfs-progs: return errors from get_root_rec, fail in callers
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "btrfsck.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
41 #include "backref.h"
42 #include "ulist.h"
43
44 enum task_position {
45         TASK_EXTENTS,
46         TASK_FREE_SPACE,
47         TASK_FS_ROOTS,
48         TASK_NOTHING, /* have to be the last element */
49 };
50
51 struct task_ctx {
52         int progress_enabled;
53         enum task_position tp;
54
55         struct task_info *info;
56 };
57
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75
76 static void *print_status_check(void *p)
77 {
78         struct task_ctx *priv = p;
79         const char work_indicator[] = { '.', 'o', 'O', 'o' };
80         uint32_t count = 0;
81         static char *task_position_string[] = {
82                 "checking extents",
83                 "checking free space cache",
84                 "checking fs roots",
85         };
86
87         task_period_start(priv->info, 1000 /* 1s */);
88
89         if (priv->tp == TASK_NOTHING)
90                 return NULL;
91
92         while (1) {
93                 printf("%s [%c]\r", task_position_string[priv->tp],
94                                 work_indicator[count % 4]);
95                 count++;
96                 fflush(stdout);
97                 task_period_wait(priv->info);
98         }
99         return NULL;
100 }
101
102 static int print_status_return(void *p)
103 {
104         printf("\n");
105         fflush(stdout);
106
107         return 0;
108 }
109
110 struct extent_backref {
111         struct list_head list;
112         unsigned int is_data:1;
113         unsigned int found_extent_tree:1;
114         unsigned int full_backref:1;
115         unsigned int found_ref:1;
116         unsigned int broken:1;
117 };
118
119 struct data_backref {
120         struct extent_backref node;
121         union {
122                 u64 parent;
123                 u64 root;
124         };
125         u64 owner;
126         u64 offset;
127         u64 disk_bytenr;
128         u64 bytes;
129         u64 ram_bytes;
130         u32 num_refs;
131         u32 found_ref;
132 };
133
134 /*
135  * Much like data_backref, just removed the undetermined members
136  * and change it to use list_head.
137  * During extent scan, it is stored in root->orphan_data_extent.
138  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
139  */
140 struct orphan_data_extent {
141         struct list_head list;
142         u64 root;
143         u64 objectid;
144         u64 offset;
145         u64 disk_bytenr;
146         u64 disk_len;
147 };
148
149 struct tree_backref {
150         struct extent_backref node;
151         union {
152                 u64 parent;
153                 u64 root;
154         };
155 };
156
157 struct extent_record {
158         struct list_head backrefs;
159         struct list_head dups;
160         struct list_head list;
161         struct cache_extent cache;
162         struct btrfs_disk_key parent_key;
163         u64 start;
164         u64 max_size;
165         u64 nr;
166         u64 refs;
167         u64 extent_item_refs;
168         u64 generation;
169         u64 parent_generation;
170         u64 info_objectid;
171         u32 num_duplicates;
172         u8 info_level;
173         int flag_block_full_backref;
174         unsigned int found_rec:1;
175         unsigned int content_checked:1;
176         unsigned int owner_ref_checked:1;
177         unsigned int is_root:1;
178         unsigned int metadata:1;
179         unsigned int bad_full_backref:1;
180         unsigned int crossing_stripes:1;
181         unsigned int wrong_chunk_type:1;
182 };
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         unsigned int filetype:8;
190         int errors;
191         unsigned int ref_type;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 struct root_item_record {
199         struct list_head list;
200         u64 objectid;
201         u64 bytenr;
202         u64 last_snapshot;
203         u8 level;
204         u8 drop_level;
205         int level_size;
206         struct btrfs_key drop_key;
207 };
208
209 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
211 #define REF_ERR_NO_INODE_REF            (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
214 #define REF_ERR_DUP_INODE_REF           (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF             (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
222
223 struct file_extent_hole {
224         struct rb_node node;
225         u64 start;
226         u64 len;
227 };
228
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
231 {
232         struct file_extent_hole *hole;
233
234         if (RB_EMPTY_ROOT(holes))
235                 return (u64)-1;
236
237         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
238         return hole->start;
239 }
240
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
242 {
243         struct file_extent_hole *hole1;
244         struct file_extent_hole *hole2;
245
246         hole1 = rb_entry(node1, struct file_extent_hole, node);
247         hole2 = rb_entry(node2, struct file_extent_hole, node);
248
249         if (hole1->start > hole2->start)
250                 return -1;
251         if (hole1->start < hole2->start)
252                 return 1;
253         /* Now hole1->start == hole2->start */
254         if (hole1->len >= hole2->len)
255                 /*
256                  * Hole 1 will be merge center
257                  * Same hole will be merged later
258                  */
259                 return -1;
260         /* Hole 2 will be merge center */
261         return 1;
262 }
263
264 /*
265  * Add a hole to the record
266  *
267  * This will do hole merge for copy_file_extent_holes(),
268  * which will ensure there won't be continuous holes.
269  */
270 static int add_file_extent_hole(struct rb_root *holes,
271                                 u64 start, u64 len)
272 {
273         struct file_extent_hole *hole;
274         struct file_extent_hole *prev = NULL;
275         struct file_extent_hole *next = NULL;
276
277         hole = malloc(sizeof(*hole));
278         if (!hole)
279                 return -ENOMEM;
280         hole->start = start;
281         hole->len = len;
282         /* Since compare will not return 0, no -EEXIST will happen */
283         rb_insert(holes, &hole->node, compare_hole);
284
285         /* simple merge with previous hole */
286         if (rb_prev(&hole->node))
287                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
288                                 node);
289         if (prev && prev->start + prev->len >= hole->start) {
290                 hole->len = hole->start + hole->len - prev->start;
291                 hole->start = prev->start;
292                 rb_erase(&prev->node, holes);
293                 free(prev);
294                 prev = NULL;
295         }
296
297         /* iterate merge with next holes */
298         while (1) {
299                 if (!rb_next(&hole->node))
300                         break;
301                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
302                                         node);
303                 if (hole->start + hole->len >= next->start) {
304                         if (hole->start + hole->len <= next->start + next->len)
305                                 hole->len = next->start + next->len -
306                                             hole->start;
307                         rb_erase(&next->node, holes);
308                         free(next);
309                         next = NULL;
310                 } else
311                         break;
312         }
313         return 0;
314 }
315
316 static int compare_hole_range(struct rb_node *node, void *data)
317 {
318         struct file_extent_hole *hole;
319         u64 start;
320
321         hole = (struct file_extent_hole *)data;
322         start = hole->start;
323
324         hole = rb_entry(node, struct file_extent_hole, node);
325         if (start < hole->start)
326                 return -1;
327         if (start >= hole->start && start < hole->start + hole->len)
328                 return 0;
329         return 1;
330 }
331
332 /*
333  * Delete a hole in the record
334  *
335  * This will do the hole split and is much restrict than add.
336  */
337 static int del_file_extent_hole(struct rb_root *holes,
338                                 u64 start, u64 len)
339 {
340         struct file_extent_hole *hole;
341         struct file_extent_hole tmp;
342         u64 prev_start = 0;
343         u64 prev_len = 0;
344         u64 next_start = 0;
345         u64 next_len = 0;
346         struct rb_node *node;
347         int have_prev = 0;
348         int have_next = 0;
349         int ret = 0;
350
351         tmp.start = start;
352         tmp.len = len;
353         node = rb_search(holes, &tmp, compare_hole_range, NULL);
354         if (!node)
355                 return -EEXIST;
356         hole = rb_entry(node, struct file_extent_hole, node);
357         if (start + len > hole->start + hole->len)
358                 return -EEXIST;
359
360         /*
361          * Now there will be no overflap, delete the hole and re-add the
362          * split(s) if they exists.
363          */
364         if (start > hole->start) {
365                 prev_start = hole->start;
366                 prev_len = start - hole->start;
367                 have_prev = 1;
368         }
369         if (hole->start + hole->len > start + len) {
370                 next_start = start + len;
371                 next_len = hole->start + hole->len - start - len;
372                 have_next = 1;
373         }
374         rb_erase(node, holes);
375         free(hole);
376         if (have_prev) {
377                 ret = add_file_extent_hole(holes, prev_start, prev_len);
378                 if (ret < 0)
379                         return ret;
380         }
381         if (have_next) {
382                 ret = add_file_extent_hole(holes, next_start, next_len);
383                 if (ret < 0)
384                         return ret;
385         }
386         return 0;
387 }
388
389 static int copy_file_extent_holes(struct rb_root *dst,
390                                   struct rb_root *src)
391 {
392         struct file_extent_hole *hole;
393         struct rb_node *node;
394         int ret = 0;
395
396         node = rb_first(src);
397         while (node) {
398                 hole = rb_entry(node, struct file_extent_hole, node);
399                 ret = add_file_extent_hole(dst, hole->start, hole->len);
400                 if (ret)
401                         break;
402                 node = rb_next(node);
403         }
404         return ret;
405 }
406
407 static void free_file_extent_holes(struct rb_root *holes)
408 {
409         struct rb_node *node;
410         struct file_extent_hole *hole;
411
412         node = rb_first(holes);
413         while (node) {
414                 hole = rb_entry(node, struct file_extent_hole, node);
415                 rb_erase(node, holes);
416                 free(hole);
417                 node = rb_first(holes);
418         }
419 }
420
421 struct inode_record {
422         struct list_head backrefs;
423         unsigned int checked:1;
424         unsigned int merging:1;
425         unsigned int found_inode_item:1;
426         unsigned int found_dir_item:1;
427         unsigned int found_file_extent:1;
428         unsigned int found_csum_item:1;
429         unsigned int some_csum_missing:1;
430         unsigned int nodatasum:1;
431         int errors;
432
433         u64 ino;
434         u32 nlink;
435         u32 imode;
436         u64 isize;
437         u64 nbytes;
438
439         u32 found_link;
440         u64 found_size;
441         u64 extent_start;
442         u64 extent_end;
443         struct rb_root holes;
444         struct list_head orphan_extents;
445
446         u32 refs;
447 };
448
449 #define I_ERR_NO_INODE_ITEM             (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
464
465 struct root_backref {
466         struct list_head list;
467         unsigned int found_dir_item:1;
468         unsigned int found_dir_index:1;
469         unsigned int found_back_ref:1;
470         unsigned int found_forward_ref:1;
471         unsigned int reachable:1;
472         int errors;
473         u64 ref_root;
474         u64 dir;
475         u64 index;
476         u16 namelen;
477         char name[0];
478 };
479
480 struct root_record {
481         struct list_head backrefs;
482         struct cache_extent cache;
483         unsigned int found_root_item:1;
484         u64 objectid;
485         u32 found_ref;
486 };
487
488 struct ptr_node {
489         struct cache_extent cache;
490         void *data;
491 };
492
493 struct shared_node {
494         struct cache_extent cache;
495         struct cache_tree root_cache;
496         struct cache_tree inode_cache;
497         struct inode_record *current;
498         u32 refs;
499 };
500
501 struct block_info {
502         u64 start;
503         u32 size;
504 };
505
506 struct walk_control {
507         struct cache_tree shared;
508         struct shared_node *nodes[BTRFS_MAX_LEVEL];
509         int active_node;
510         int root_level;
511 };
512
513 struct bad_item {
514         struct btrfs_key key;
515         u64 root_id;
516         struct list_head list;
517 };
518
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
520
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522                                  struct btrfs_root *root)
523 {
524         if (root->last_trans != trans->transid) {
525                 root->track_dirty = 1;
526                 root->last_trans = trans->transid;
527                 root->commit_root = root->node;
528                 extent_buffer_get(root->node);
529         }
530 }
531
532 static u8 imode_to_type(u32 imode)
533 {
534 #define S_SHIFT 12
535         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
537                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
538                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
539                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
540                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
541                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
542                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
543         };
544
545         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
546 #undef S_SHIFT
547 }
548
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
550 {
551         struct device_record *rec1;
552         struct device_record *rec2;
553
554         rec1 = rb_entry(node1, struct device_record, node);
555         rec2 = rb_entry(node2, struct device_record, node);
556         if (rec1->devid > rec2->devid)
557                 return -1;
558         else if (rec1->devid < rec2->devid)
559                 return 1;
560         else
561                 return 0;
562 }
563
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
565 {
566         struct inode_record *rec;
567         struct inode_backref *backref;
568         struct inode_backref *orig;
569         struct inode_backref *tmp;
570         struct orphan_data_extent *src_orphan;
571         struct orphan_data_extent *dst_orphan;
572         size_t size;
573         int ret;
574
575         rec = malloc(sizeof(*rec));
576         if (!rec)
577                 return ERR_PTR(-ENOMEM);
578         memcpy(rec, orig_rec, sizeof(*rec));
579         rec->refs = 1;
580         INIT_LIST_HEAD(&rec->backrefs);
581         INIT_LIST_HEAD(&rec->orphan_extents);
582         rec->holes = RB_ROOT;
583
584         list_for_each_entry(orig, &orig_rec->backrefs, list) {
585                 size = sizeof(*orig) + orig->namelen + 1;
586                 backref = malloc(size);
587                 if (!backref) {
588                         ret = -ENOMEM;
589                         goto cleanup;
590                 }
591                 memcpy(backref, orig, size);
592                 list_add_tail(&backref->list, &rec->backrefs);
593         }
594         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
595                 dst_orphan = malloc(sizeof(*dst_orphan));
596                 if (!dst_orphan) {
597                         ret = -ENOMEM;
598                         goto cleanup;
599                 }
600                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
601                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
602         }
603         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
604         BUG_ON(ret < 0);
605
606         return rec;
607
608 cleanup:
609         if (!list_empty(&rec->backrefs))
610                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
611                         list_del(&orig->list);
612                         free(orig);
613                 }
614
615         if (!list_empty(&rec->orphan_extents))
616                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
617                         list_del(&orig->list);
618                         free(orig);
619                 }
620
621         free(rec);
622
623         return ERR_PTR(ret);
624 }
625
626 static void print_orphan_data_extents(struct list_head *orphan_extents,
627                                       u64 objectid)
628 {
629         struct orphan_data_extent *orphan;
630
631         if (list_empty(orphan_extents))
632                 return;
633         printf("The following data extent is lost in tree %llu:\n",
634                objectid);
635         list_for_each_entry(orphan, orphan_extents, list) {
636                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
637                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
638                        orphan->disk_len);
639         }
640 }
641
642 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
643 {
644         u64 root_objectid = root->root_key.objectid;
645         int errors = rec->errors;
646
647         if (!errors)
648                 return;
649         /* reloc root errors, we print its corresponding fs root objectid*/
650         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
651                 root_objectid = root->root_key.offset;
652                 fprintf(stderr, "reloc");
653         }
654         fprintf(stderr, "root %llu inode %llu errors %x",
655                 (unsigned long long) root_objectid,
656                 (unsigned long long) rec->ino, rec->errors);
657
658         if (errors & I_ERR_NO_INODE_ITEM)
659                 fprintf(stderr, ", no inode item");
660         if (errors & I_ERR_NO_ORPHAN_ITEM)
661                 fprintf(stderr, ", no orphan item");
662         if (errors & I_ERR_DUP_INODE_ITEM)
663                 fprintf(stderr, ", dup inode item");
664         if (errors & I_ERR_DUP_DIR_INDEX)
665                 fprintf(stderr, ", dup dir index");
666         if (errors & I_ERR_ODD_DIR_ITEM)
667                 fprintf(stderr, ", odd dir item");
668         if (errors & I_ERR_ODD_FILE_EXTENT)
669                 fprintf(stderr, ", odd file extent");
670         if (errors & I_ERR_BAD_FILE_EXTENT)
671                 fprintf(stderr, ", bad file extent");
672         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
673                 fprintf(stderr, ", file extent overlap");
674         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
675                 fprintf(stderr, ", file extent discount");
676         if (errors & I_ERR_DIR_ISIZE_WRONG)
677                 fprintf(stderr, ", dir isize wrong");
678         if (errors & I_ERR_FILE_NBYTES_WRONG)
679                 fprintf(stderr, ", nbytes wrong");
680         if (errors & I_ERR_ODD_CSUM_ITEM)
681                 fprintf(stderr, ", odd csum item");
682         if (errors & I_ERR_SOME_CSUM_MISSING)
683                 fprintf(stderr, ", some csum missing");
684         if (errors & I_ERR_LINK_COUNT_WRONG)
685                 fprintf(stderr, ", link count wrong");
686         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
687                 fprintf(stderr, ", orphan file extent");
688         fprintf(stderr, "\n");
689         /* Print the orphan extents if needed */
690         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
691                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
692
693         /* Print the holes if needed */
694         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
695                 struct file_extent_hole *hole;
696                 struct rb_node *node;
697                 int found = 0;
698
699                 node = rb_first(&rec->holes);
700                 fprintf(stderr, "Found file extent holes:\n");
701                 while (node) {
702                         found = 1;
703                         hole = rb_entry(node, struct file_extent_hole, node);
704                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
705                                 hole->start, hole->len);
706                         node = rb_next(node);
707                 }
708                 if (!found)
709                         fprintf(stderr, "\tstart: 0, len: %llu\n",
710                                 round_up(rec->isize, root->sectorsize));
711         }
712 }
713
714 static void print_ref_error(int errors)
715 {
716         if (errors & REF_ERR_NO_DIR_ITEM)
717                 fprintf(stderr, ", no dir item");
718         if (errors & REF_ERR_NO_DIR_INDEX)
719                 fprintf(stderr, ", no dir index");
720         if (errors & REF_ERR_NO_INODE_REF)
721                 fprintf(stderr, ", no inode ref");
722         if (errors & REF_ERR_DUP_DIR_ITEM)
723                 fprintf(stderr, ", dup dir item");
724         if (errors & REF_ERR_DUP_DIR_INDEX)
725                 fprintf(stderr, ", dup dir index");
726         if (errors & REF_ERR_DUP_INODE_REF)
727                 fprintf(stderr, ", dup inode ref");
728         if (errors & REF_ERR_INDEX_UNMATCH)
729                 fprintf(stderr, ", index unmatch");
730         if (errors & REF_ERR_FILETYPE_UNMATCH)
731                 fprintf(stderr, ", filetype unmatch");
732         if (errors & REF_ERR_NAME_TOO_LONG)
733                 fprintf(stderr, ", name too long");
734         if (errors & REF_ERR_NO_ROOT_REF)
735                 fprintf(stderr, ", no root ref");
736         if (errors & REF_ERR_NO_ROOT_BACKREF)
737                 fprintf(stderr, ", no root backref");
738         if (errors & REF_ERR_DUP_ROOT_REF)
739                 fprintf(stderr, ", dup root ref");
740         if (errors & REF_ERR_DUP_ROOT_BACKREF)
741                 fprintf(stderr, ", dup root backref");
742         fprintf(stderr, "\n");
743 }
744
745 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
746                                           u64 ino, int mod)
747 {
748         struct ptr_node *node;
749         struct cache_extent *cache;
750         struct inode_record *rec = NULL;
751         int ret;
752
753         cache = lookup_cache_extent(inode_cache, ino, 1);
754         if (cache) {
755                 node = container_of(cache, struct ptr_node, cache);
756                 rec = node->data;
757                 if (mod && rec->refs > 1) {
758                         node->data = clone_inode_rec(rec);
759                         if (IS_ERR(node->data))
760                                 return node->data;
761                         rec->refs--;
762                         rec = node->data;
763                 }
764         } else if (mod) {
765                 rec = calloc(1, sizeof(*rec));
766                 if (!rec)
767                         return ERR_PTR(-ENOMEM);
768                 rec->ino = ino;
769                 rec->extent_start = (u64)-1;
770                 rec->refs = 1;
771                 INIT_LIST_HEAD(&rec->backrefs);
772                 INIT_LIST_HEAD(&rec->orphan_extents);
773                 rec->holes = RB_ROOT;
774
775                 node = malloc(sizeof(*node));
776                 if (!node) {
777                         free(rec);
778                         return ERR_PTR(-ENOMEM);
779                 }
780                 node->cache.start = ino;
781                 node->cache.size = 1;
782                 node->data = rec;
783
784                 if (ino == BTRFS_FREE_INO_OBJECTID)
785                         rec->found_link = 1;
786
787                 ret = insert_cache_extent(inode_cache, &node->cache);
788                 if (ret)
789                         return ERR_PTR(-EEXIST);
790         }
791         return rec;
792 }
793
794 static void free_orphan_data_extents(struct list_head *orphan_extents)
795 {
796         struct orphan_data_extent *orphan;
797
798         while (!list_empty(orphan_extents)) {
799                 orphan = list_entry(orphan_extents->next,
800                                     struct orphan_data_extent, list);
801                 list_del(&orphan->list);
802                 free(orphan);
803         }
804 }
805
806 static void free_inode_rec(struct inode_record *rec)
807 {
808         struct inode_backref *backref;
809
810         if (--rec->refs > 0)
811                 return;
812
813         while (!list_empty(&rec->backrefs)) {
814                 backref = list_entry(rec->backrefs.next,
815                                      struct inode_backref, list);
816                 list_del(&backref->list);
817                 free(backref);
818         }
819         free_orphan_data_extents(&rec->orphan_extents);
820         free_file_extent_holes(&rec->holes);
821         free(rec);
822 }
823
824 static int can_free_inode_rec(struct inode_record *rec)
825 {
826         if (!rec->errors && rec->checked && rec->found_inode_item &&
827             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
828                 return 1;
829         return 0;
830 }
831
832 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
833                                  struct inode_record *rec)
834 {
835         struct cache_extent *cache;
836         struct inode_backref *tmp, *backref;
837         struct ptr_node *node;
838         unsigned char filetype;
839
840         if (!rec->found_inode_item)
841                 return;
842
843         filetype = imode_to_type(rec->imode);
844         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
845                 if (backref->found_dir_item && backref->found_dir_index) {
846                         if (backref->filetype != filetype)
847                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
848                         if (!backref->errors && backref->found_inode_ref &&
849                             rec->nlink == rec->found_link) {
850                                 list_del(&backref->list);
851                                 free(backref);
852                         }
853                 }
854         }
855
856         if (!rec->checked || rec->merging)
857                 return;
858
859         if (S_ISDIR(rec->imode)) {
860                 if (rec->found_size != rec->isize)
861                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
862                 if (rec->found_file_extent)
863                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
864         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
865                 if (rec->found_dir_item)
866                         rec->errors |= I_ERR_ODD_DIR_ITEM;
867                 if (rec->found_size != rec->nbytes)
868                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
869                 if (rec->nlink > 0 && !no_holes &&
870                     (rec->extent_end < rec->isize ||
871                      first_extent_gap(&rec->holes) < rec->isize))
872                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
873         }
874
875         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
876                 if (rec->found_csum_item && rec->nodatasum)
877                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
878                 if (rec->some_csum_missing && !rec->nodatasum)
879                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
880         }
881
882         BUG_ON(rec->refs != 1);
883         if (can_free_inode_rec(rec)) {
884                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
885                 node = container_of(cache, struct ptr_node, cache);
886                 BUG_ON(node->data != rec);
887                 remove_cache_extent(inode_cache, &node->cache);
888                 free(node);
889                 free_inode_rec(rec);
890         }
891 }
892
893 static int check_orphan_item(struct btrfs_root *root, u64 ino)
894 {
895         struct btrfs_path path;
896         struct btrfs_key key;
897         int ret;
898
899         key.objectid = BTRFS_ORPHAN_OBJECTID;
900         key.type = BTRFS_ORPHAN_ITEM_KEY;
901         key.offset = ino;
902
903         btrfs_init_path(&path);
904         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
905         btrfs_release_path(&path);
906         if (ret > 0)
907                 ret = -ENOENT;
908         return ret;
909 }
910
911 static int process_inode_item(struct extent_buffer *eb,
912                               int slot, struct btrfs_key *key,
913                               struct shared_node *active_node)
914 {
915         struct inode_record *rec;
916         struct btrfs_inode_item *item;
917
918         rec = active_node->current;
919         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
920         if (rec->found_inode_item) {
921                 rec->errors |= I_ERR_DUP_INODE_ITEM;
922                 return 1;
923         }
924         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
925         rec->nlink = btrfs_inode_nlink(eb, item);
926         rec->isize = btrfs_inode_size(eb, item);
927         rec->nbytes = btrfs_inode_nbytes(eb, item);
928         rec->imode = btrfs_inode_mode(eb, item);
929         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
930                 rec->nodatasum = 1;
931         rec->found_inode_item = 1;
932         if (rec->nlink == 0)
933                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
934         maybe_free_inode_rec(&active_node->inode_cache, rec);
935         return 0;
936 }
937
938 static struct inode_backref *get_inode_backref(struct inode_record *rec,
939                                                 const char *name,
940                                                 int namelen, u64 dir)
941 {
942         struct inode_backref *backref;
943
944         list_for_each_entry(backref, &rec->backrefs, list) {
945                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
946                         break;
947                 if (backref->dir != dir || backref->namelen != namelen)
948                         continue;
949                 if (memcmp(name, backref->name, namelen))
950                         continue;
951                 return backref;
952         }
953
954         backref = malloc(sizeof(*backref) + namelen + 1);
955         memset(backref, 0, sizeof(*backref));
956         backref->dir = dir;
957         backref->namelen = namelen;
958         memcpy(backref->name, name, namelen);
959         backref->name[namelen] = '\0';
960         list_add_tail(&backref->list, &rec->backrefs);
961         return backref;
962 }
963
964 static int add_inode_backref(struct cache_tree *inode_cache,
965                              u64 ino, u64 dir, u64 index,
966                              const char *name, int namelen,
967                              int filetype, int itemtype, int errors)
968 {
969         struct inode_record *rec;
970         struct inode_backref *backref;
971
972         rec = get_inode_rec(inode_cache, ino, 1);
973         BUG_ON(IS_ERR(rec));
974         backref = get_inode_backref(rec, name, namelen, dir);
975         if (errors)
976                 backref->errors |= errors;
977         if (itemtype == BTRFS_DIR_INDEX_KEY) {
978                 if (backref->found_dir_index)
979                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
980                 if (backref->found_inode_ref && backref->index != index)
981                         backref->errors |= REF_ERR_INDEX_UNMATCH;
982                 if (backref->found_dir_item && backref->filetype != filetype)
983                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
984
985                 backref->index = index;
986                 backref->filetype = filetype;
987                 backref->found_dir_index = 1;
988         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
989                 rec->found_link++;
990                 if (backref->found_dir_item)
991                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
992                 if (backref->found_dir_index && backref->filetype != filetype)
993                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
994
995                 backref->filetype = filetype;
996                 backref->found_dir_item = 1;
997         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
998                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
999                 if (backref->found_inode_ref)
1000                         backref->errors |= REF_ERR_DUP_INODE_REF;
1001                 if (backref->found_dir_index && backref->index != index)
1002                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1003                 else
1004                         backref->index = index;
1005
1006                 backref->ref_type = itemtype;
1007                 backref->found_inode_ref = 1;
1008         } else {
1009                 BUG_ON(1);
1010         }
1011
1012         maybe_free_inode_rec(inode_cache, rec);
1013         return 0;
1014 }
1015
1016 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1017                             struct cache_tree *dst_cache)
1018 {
1019         struct inode_backref *backref;
1020         u32 dir_count = 0;
1021         int ret = 0;
1022
1023         dst->merging = 1;
1024         list_for_each_entry(backref, &src->backrefs, list) {
1025                 if (backref->found_dir_index) {
1026                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1027                                         backref->index, backref->name,
1028                                         backref->namelen, backref->filetype,
1029                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1030                 }
1031                 if (backref->found_dir_item) {
1032                         dir_count++;
1033                         add_inode_backref(dst_cache, dst->ino,
1034                                         backref->dir, 0, backref->name,
1035                                         backref->namelen, backref->filetype,
1036                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1037                 }
1038                 if (backref->found_inode_ref) {
1039                         add_inode_backref(dst_cache, dst->ino,
1040                                         backref->dir, backref->index,
1041                                         backref->name, backref->namelen, 0,
1042                                         backref->ref_type, backref->errors);
1043                 }
1044         }
1045
1046         if (src->found_dir_item)
1047                 dst->found_dir_item = 1;
1048         if (src->found_file_extent)
1049                 dst->found_file_extent = 1;
1050         if (src->found_csum_item)
1051                 dst->found_csum_item = 1;
1052         if (src->some_csum_missing)
1053                 dst->some_csum_missing = 1;
1054         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1055                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1056                 if (ret < 0)
1057                         return ret;
1058         }
1059
1060         BUG_ON(src->found_link < dir_count);
1061         dst->found_link += src->found_link - dir_count;
1062         dst->found_size += src->found_size;
1063         if (src->extent_start != (u64)-1) {
1064                 if (dst->extent_start == (u64)-1) {
1065                         dst->extent_start = src->extent_start;
1066                         dst->extent_end = src->extent_end;
1067                 } else {
1068                         if (dst->extent_end > src->extent_start)
1069                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1070                         else if (dst->extent_end < src->extent_start) {
1071                                 ret = add_file_extent_hole(&dst->holes,
1072                                         dst->extent_end,
1073                                         src->extent_start - dst->extent_end);
1074                         }
1075                         if (dst->extent_end < src->extent_end)
1076                                 dst->extent_end = src->extent_end;
1077                 }
1078         }
1079
1080         dst->errors |= src->errors;
1081         if (src->found_inode_item) {
1082                 if (!dst->found_inode_item) {
1083                         dst->nlink = src->nlink;
1084                         dst->isize = src->isize;
1085                         dst->nbytes = src->nbytes;
1086                         dst->imode = src->imode;
1087                         dst->nodatasum = src->nodatasum;
1088                         dst->found_inode_item = 1;
1089                 } else {
1090                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1091                 }
1092         }
1093         dst->merging = 0;
1094
1095         return 0;
1096 }
1097
1098 static int splice_shared_node(struct shared_node *src_node,
1099                               struct shared_node *dst_node)
1100 {
1101         struct cache_extent *cache;
1102         struct ptr_node *node, *ins;
1103         struct cache_tree *src, *dst;
1104         struct inode_record *rec, *conflict;
1105         u64 current_ino = 0;
1106         int splice = 0;
1107         int ret;
1108
1109         if (--src_node->refs == 0)
1110                 splice = 1;
1111         if (src_node->current)
1112                 current_ino = src_node->current->ino;
1113
1114         src = &src_node->root_cache;
1115         dst = &dst_node->root_cache;
1116 again:
1117         cache = search_cache_extent(src, 0);
1118         while (cache) {
1119                 node = container_of(cache, struct ptr_node, cache);
1120                 rec = node->data;
1121                 cache = next_cache_extent(cache);
1122
1123                 if (splice) {
1124                         remove_cache_extent(src, &node->cache);
1125                         ins = node;
1126                 } else {
1127                         ins = malloc(sizeof(*ins));
1128                         ins->cache.start = node->cache.start;
1129                         ins->cache.size = node->cache.size;
1130                         ins->data = rec;
1131                         rec->refs++;
1132                 }
1133                 ret = insert_cache_extent(dst, &ins->cache);
1134                 if (ret == -EEXIST) {
1135                         conflict = get_inode_rec(dst, rec->ino, 1);
1136                         BUG_ON(IS_ERR(conflict));
1137                         merge_inode_recs(rec, conflict, dst);
1138                         if (rec->checked) {
1139                                 conflict->checked = 1;
1140                                 if (dst_node->current == conflict)
1141                                         dst_node->current = NULL;
1142                         }
1143                         maybe_free_inode_rec(dst, conflict);
1144                         free_inode_rec(rec);
1145                         free(ins);
1146                 } else {
1147                         BUG_ON(ret);
1148                 }
1149         }
1150
1151         if (src == &src_node->root_cache) {
1152                 src = &src_node->inode_cache;
1153                 dst = &dst_node->inode_cache;
1154                 goto again;
1155         }
1156
1157         if (current_ino > 0 && (!dst_node->current ||
1158             current_ino > dst_node->current->ino)) {
1159                 if (dst_node->current) {
1160                         dst_node->current->checked = 1;
1161                         maybe_free_inode_rec(dst, dst_node->current);
1162                 }
1163                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1164                 BUG_ON(IS_ERR(dst_node->current));
1165         }
1166         return 0;
1167 }
1168
1169 static void free_inode_ptr(struct cache_extent *cache)
1170 {
1171         struct ptr_node *node;
1172         struct inode_record *rec;
1173
1174         node = container_of(cache, struct ptr_node, cache);
1175         rec = node->data;
1176         free_inode_rec(rec);
1177         free(node);
1178 }
1179
1180 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1181
1182 static struct shared_node *find_shared_node(struct cache_tree *shared,
1183                                             u64 bytenr)
1184 {
1185         struct cache_extent *cache;
1186         struct shared_node *node;
1187
1188         cache = lookup_cache_extent(shared, bytenr, 1);
1189         if (cache) {
1190                 node = container_of(cache, struct shared_node, cache);
1191                 return node;
1192         }
1193         return NULL;
1194 }
1195
1196 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1197 {
1198         int ret;
1199         struct shared_node *node;
1200
1201         node = calloc(1, sizeof(*node));
1202         if (!node)
1203                 return -ENOMEM;
1204         node->cache.start = bytenr;
1205         node->cache.size = 1;
1206         cache_tree_init(&node->root_cache);
1207         cache_tree_init(&node->inode_cache);
1208         node->refs = refs;
1209
1210         ret = insert_cache_extent(shared, &node->cache);
1211
1212         return ret;
1213 }
1214
1215 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1216                              struct walk_control *wc, int level)
1217 {
1218         struct shared_node *node;
1219         struct shared_node *dest;
1220         int ret;
1221
1222         if (level == wc->active_node)
1223                 return 0;
1224
1225         BUG_ON(wc->active_node <= level);
1226         node = find_shared_node(&wc->shared, bytenr);
1227         if (!node) {
1228                 ret = add_shared_node(&wc->shared, bytenr, refs);
1229                 BUG_ON(ret);
1230                 node = find_shared_node(&wc->shared, bytenr);
1231                 wc->nodes[level] = node;
1232                 wc->active_node = level;
1233                 return 0;
1234         }
1235
1236         if (wc->root_level == wc->active_node &&
1237             btrfs_root_refs(&root->root_item) == 0) {
1238                 if (--node->refs == 0) {
1239                         free_inode_recs_tree(&node->root_cache);
1240                         free_inode_recs_tree(&node->inode_cache);
1241                         remove_cache_extent(&wc->shared, &node->cache);
1242                         free(node);
1243                 }
1244                 return 1;
1245         }
1246
1247         dest = wc->nodes[wc->active_node];
1248         splice_shared_node(node, dest);
1249         if (node->refs == 0) {
1250                 remove_cache_extent(&wc->shared, &node->cache);
1251                 free(node);
1252         }
1253         return 1;
1254 }
1255
1256 static int leave_shared_node(struct btrfs_root *root,
1257                              struct walk_control *wc, int level)
1258 {
1259         struct shared_node *node;
1260         struct shared_node *dest;
1261         int i;
1262
1263         if (level == wc->root_level)
1264                 return 0;
1265
1266         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1267                 if (wc->nodes[i])
1268                         break;
1269         }
1270         BUG_ON(i >= BTRFS_MAX_LEVEL);
1271
1272         node = wc->nodes[wc->active_node];
1273         wc->nodes[wc->active_node] = NULL;
1274         wc->active_node = i;
1275
1276         dest = wc->nodes[wc->active_node];
1277         if (wc->active_node < wc->root_level ||
1278             btrfs_root_refs(&root->root_item) > 0) {
1279                 BUG_ON(node->refs <= 1);
1280                 splice_shared_node(node, dest);
1281         } else {
1282                 BUG_ON(node->refs < 2);
1283                 node->refs--;
1284         }
1285         return 0;
1286 }
1287
1288 /*
1289  * Returns:
1290  * < 0 - on error
1291  * 1   - if the root with id child_root_id is a child of root parent_root_id
1292  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1293  *       has other root(s) as parent(s)
1294  * 2   - if the root child_root_id doesn't have any parent roots
1295  */
1296 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1297                          u64 child_root_id)
1298 {
1299         struct btrfs_path path;
1300         struct btrfs_key key;
1301         struct extent_buffer *leaf;
1302         int has_parent = 0;
1303         int ret;
1304
1305         btrfs_init_path(&path);
1306
1307         key.objectid = parent_root_id;
1308         key.type = BTRFS_ROOT_REF_KEY;
1309         key.offset = child_root_id;
1310         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1311                                 0, 0);
1312         if (ret < 0)
1313                 return ret;
1314         btrfs_release_path(&path);
1315         if (!ret)
1316                 return 1;
1317
1318         key.objectid = child_root_id;
1319         key.type = BTRFS_ROOT_BACKREF_KEY;
1320         key.offset = 0;
1321         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1322                                 0, 0);
1323         if (ret < 0)
1324                 goto out;
1325
1326         while (1) {
1327                 leaf = path.nodes[0];
1328                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1329                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1330                         if (ret)
1331                                 break;
1332                         leaf = path.nodes[0];
1333                 }
1334
1335                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1336                 if (key.objectid != child_root_id ||
1337                     key.type != BTRFS_ROOT_BACKREF_KEY)
1338                         break;
1339
1340                 has_parent = 1;
1341
1342                 if (key.offset == parent_root_id) {
1343                         btrfs_release_path(&path);
1344                         return 1;
1345                 }
1346
1347                 path.slots[0]++;
1348         }
1349 out:
1350         btrfs_release_path(&path);
1351         if (ret < 0)
1352                 return ret;
1353         return has_parent ? 0 : 2;
1354 }
1355
1356 static int process_dir_item(struct btrfs_root *root,
1357                             struct extent_buffer *eb,
1358                             int slot, struct btrfs_key *key,
1359                             struct shared_node *active_node)
1360 {
1361         u32 total;
1362         u32 cur = 0;
1363         u32 len;
1364         u32 name_len;
1365         u32 data_len;
1366         int error;
1367         int nritems = 0;
1368         int filetype;
1369         struct btrfs_dir_item *di;
1370         struct inode_record *rec;
1371         struct cache_tree *root_cache;
1372         struct cache_tree *inode_cache;
1373         struct btrfs_key location;
1374         char namebuf[BTRFS_NAME_LEN];
1375
1376         root_cache = &active_node->root_cache;
1377         inode_cache = &active_node->inode_cache;
1378         rec = active_node->current;
1379         rec->found_dir_item = 1;
1380
1381         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1382         total = btrfs_item_size_nr(eb, slot);
1383         while (cur < total) {
1384                 nritems++;
1385                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1386                 name_len = btrfs_dir_name_len(eb, di);
1387                 data_len = btrfs_dir_data_len(eb, di);
1388                 filetype = btrfs_dir_type(eb, di);
1389
1390                 rec->found_size += name_len;
1391                 if (name_len <= BTRFS_NAME_LEN) {
1392                         len = name_len;
1393                         error = 0;
1394                 } else {
1395                         len = BTRFS_NAME_LEN;
1396                         error = REF_ERR_NAME_TOO_LONG;
1397                 }
1398                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1399
1400                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1401                         add_inode_backref(inode_cache, location.objectid,
1402                                           key->objectid, key->offset, namebuf,
1403                                           len, filetype, key->type, error);
1404                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1405                         add_inode_backref(root_cache, location.objectid,
1406                                           key->objectid, key->offset,
1407                                           namebuf, len, filetype,
1408                                           key->type, error);
1409                 } else {
1410                         fprintf(stderr, "invalid location in dir item %u\n",
1411                                 location.type);
1412                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1413                                           key->objectid, key->offset, namebuf,
1414                                           len, filetype, key->type, error);
1415                 }
1416
1417                 len = sizeof(*di) + name_len + data_len;
1418                 di = (struct btrfs_dir_item *)((char *)di + len);
1419                 cur += len;
1420         }
1421         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1422                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1423
1424         return 0;
1425 }
1426
1427 static int process_inode_ref(struct extent_buffer *eb,
1428                              int slot, struct btrfs_key *key,
1429                              struct shared_node *active_node)
1430 {
1431         u32 total;
1432         u32 cur = 0;
1433         u32 len;
1434         u32 name_len;
1435         u64 index;
1436         int error;
1437         struct cache_tree *inode_cache;
1438         struct btrfs_inode_ref *ref;
1439         char namebuf[BTRFS_NAME_LEN];
1440
1441         inode_cache = &active_node->inode_cache;
1442
1443         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1444         total = btrfs_item_size_nr(eb, slot);
1445         while (cur < total) {
1446                 name_len = btrfs_inode_ref_name_len(eb, ref);
1447                 index = btrfs_inode_ref_index(eb, ref);
1448                 if (name_len <= BTRFS_NAME_LEN) {
1449                         len = name_len;
1450                         error = 0;
1451                 } else {
1452                         len = BTRFS_NAME_LEN;
1453                         error = REF_ERR_NAME_TOO_LONG;
1454                 }
1455                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1456                 add_inode_backref(inode_cache, key->objectid, key->offset,
1457                                   index, namebuf, len, 0, key->type, error);
1458
1459                 len = sizeof(*ref) + name_len;
1460                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1461                 cur += len;
1462         }
1463         return 0;
1464 }
1465
1466 static int process_inode_extref(struct extent_buffer *eb,
1467                                 int slot, struct btrfs_key *key,
1468                                 struct shared_node *active_node)
1469 {
1470         u32 total;
1471         u32 cur = 0;
1472         u32 len;
1473         u32 name_len;
1474         u64 index;
1475         u64 parent;
1476         int error;
1477         struct cache_tree *inode_cache;
1478         struct btrfs_inode_extref *extref;
1479         char namebuf[BTRFS_NAME_LEN];
1480
1481         inode_cache = &active_node->inode_cache;
1482
1483         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1484         total = btrfs_item_size_nr(eb, slot);
1485         while (cur < total) {
1486                 name_len = btrfs_inode_extref_name_len(eb, extref);
1487                 index = btrfs_inode_extref_index(eb, extref);
1488                 parent = btrfs_inode_extref_parent(eb, extref);
1489                 if (name_len <= BTRFS_NAME_LEN) {
1490                         len = name_len;
1491                         error = 0;
1492                 } else {
1493                         len = BTRFS_NAME_LEN;
1494                         error = REF_ERR_NAME_TOO_LONG;
1495                 }
1496                 read_extent_buffer(eb, namebuf,
1497                                    (unsigned long)(extref + 1), len);
1498                 add_inode_backref(inode_cache, key->objectid, parent,
1499                                   index, namebuf, len, 0, key->type, error);
1500
1501                 len = sizeof(*extref) + name_len;
1502                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1503                 cur += len;
1504         }
1505         return 0;
1506
1507 }
1508
1509 static int count_csum_range(struct btrfs_root *root, u64 start,
1510                             u64 len, u64 *found)
1511 {
1512         struct btrfs_key key;
1513         struct btrfs_path path;
1514         struct extent_buffer *leaf;
1515         int ret;
1516         size_t size;
1517         *found = 0;
1518         u64 csum_end;
1519         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1520
1521         btrfs_init_path(&path);
1522
1523         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1524         key.offset = start;
1525         key.type = BTRFS_EXTENT_CSUM_KEY;
1526
1527         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1528                                 &key, &path, 0, 0);
1529         if (ret < 0)
1530                 goto out;
1531         if (ret > 0 && path.slots[0] > 0) {
1532                 leaf = path.nodes[0];
1533                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1534                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1535                     key.type == BTRFS_EXTENT_CSUM_KEY)
1536                         path.slots[0]--;
1537         }
1538
1539         while (len > 0) {
1540                 leaf = path.nodes[0];
1541                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1542                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1543                         if (ret > 0)
1544                                 break;
1545                         else if (ret < 0)
1546                                 goto out;
1547                         leaf = path.nodes[0];
1548                 }
1549
1550                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1551                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1552                     key.type != BTRFS_EXTENT_CSUM_KEY)
1553                         break;
1554
1555                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1556                 if (key.offset >= start + len)
1557                         break;
1558
1559                 if (key.offset > start)
1560                         start = key.offset;
1561
1562                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1563                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1564                 if (csum_end > start) {
1565                         size = min(csum_end - start, len);
1566                         len -= size;
1567                         start += size;
1568                         *found += size;
1569                 }
1570
1571                 path.slots[0]++;
1572         }
1573 out:
1574         btrfs_release_path(&path);
1575         if (ret < 0)
1576                 return ret;
1577         return 0;
1578 }
1579
1580 static int process_file_extent(struct btrfs_root *root,
1581                                 struct extent_buffer *eb,
1582                                 int slot, struct btrfs_key *key,
1583                                 struct shared_node *active_node)
1584 {
1585         struct inode_record *rec;
1586         struct btrfs_file_extent_item *fi;
1587         u64 num_bytes = 0;
1588         u64 disk_bytenr = 0;
1589         u64 extent_offset = 0;
1590         u64 mask = root->sectorsize - 1;
1591         int extent_type;
1592         int ret;
1593
1594         rec = active_node->current;
1595         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1596         rec->found_file_extent = 1;
1597
1598         if (rec->extent_start == (u64)-1) {
1599                 rec->extent_start = key->offset;
1600                 rec->extent_end = key->offset;
1601         }
1602
1603         if (rec->extent_end > key->offset)
1604                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1605         else if (rec->extent_end < key->offset) {
1606                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1607                                            key->offset - rec->extent_end);
1608                 if (ret < 0)
1609                         return ret;
1610         }
1611
1612         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1613         extent_type = btrfs_file_extent_type(eb, fi);
1614
1615         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1616                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1617                 if (num_bytes == 0)
1618                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1619                 rec->found_size += num_bytes;
1620                 num_bytes = (num_bytes + mask) & ~mask;
1621         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1622                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1623                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1624                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1625                 extent_offset = btrfs_file_extent_offset(eb, fi);
1626                 if (num_bytes == 0 || (num_bytes & mask))
1627                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1628                 if (num_bytes + extent_offset >
1629                     btrfs_file_extent_ram_bytes(eb, fi))
1630                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1631                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1632                     (btrfs_file_extent_compression(eb, fi) ||
1633                      btrfs_file_extent_encryption(eb, fi) ||
1634                      btrfs_file_extent_other_encoding(eb, fi)))
1635                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1636                 if (disk_bytenr > 0)
1637                         rec->found_size += num_bytes;
1638         } else {
1639                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1640         }
1641         rec->extent_end = key->offset + num_bytes;
1642
1643         /*
1644          * The data reloc tree will copy full extents into its inode and then
1645          * copy the corresponding csums.  Because the extent it copied could be
1646          * a preallocated extent that hasn't been written to yet there may be no
1647          * csums to copy, ergo we won't have csums for our file extent.  This is
1648          * ok so just don't bother checking csums if the inode belongs to the
1649          * data reloc tree.
1650          */
1651         if (disk_bytenr > 0 &&
1652             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1653                 u64 found;
1654                 if (btrfs_file_extent_compression(eb, fi))
1655                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1656                 else
1657                         disk_bytenr += extent_offset;
1658
1659                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1660                 if (ret < 0)
1661                         return ret;
1662                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1663                         if (found > 0)
1664                                 rec->found_csum_item = 1;
1665                         if (found < num_bytes)
1666                                 rec->some_csum_missing = 1;
1667                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1668                         if (found > 0)
1669                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1670                 }
1671         }
1672         return 0;
1673 }
1674
1675 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1676                             struct walk_control *wc)
1677 {
1678         struct btrfs_key key;
1679         u32 nritems;
1680         int i;
1681         int ret = 0;
1682         struct cache_tree *inode_cache;
1683         struct shared_node *active_node;
1684
1685         if (wc->root_level == wc->active_node &&
1686             btrfs_root_refs(&root->root_item) == 0)
1687                 return 0;
1688
1689         active_node = wc->nodes[wc->active_node];
1690         inode_cache = &active_node->inode_cache;
1691         nritems = btrfs_header_nritems(eb);
1692         for (i = 0; i < nritems; i++) {
1693                 btrfs_item_key_to_cpu(eb, &key, i);
1694
1695                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1696                         continue;
1697                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1698                         continue;
1699
1700                 if (active_node->current == NULL ||
1701                     active_node->current->ino < key.objectid) {
1702                         if (active_node->current) {
1703                                 active_node->current->checked = 1;
1704                                 maybe_free_inode_rec(inode_cache,
1705                                                      active_node->current);
1706                         }
1707                         active_node->current = get_inode_rec(inode_cache,
1708                                                              key.objectid, 1);
1709                         BUG_ON(IS_ERR(active_node->current));
1710                 }
1711                 switch (key.type) {
1712                 case BTRFS_DIR_ITEM_KEY:
1713                 case BTRFS_DIR_INDEX_KEY:
1714                         ret = process_dir_item(root, eb, i, &key, active_node);
1715                         break;
1716                 case BTRFS_INODE_REF_KEY:
1717                         ret = process_inode_ref(eb, i, &key, active_node);
1718                         break;
1719                 case BTRFS_INODE_EXTREF_KEY:
1720                         ret = process_inode_extref(eb, i, &key, active_node);
1721                         break;
1722                 case BTRFS_INODE_ITEM_KEY:
1723                         ret = process_inode_item(eb, i, &key, active_node);
1724                         break;
1725                 case BTRFS_EXTENT_DATA_KEY:
1726                         ret = process_file_extent(root, eb, i, &key,
1727                                                   active_node);
1728                         break;
1729                 default:
1730                         break;
1731                 };
1732         }
1733         return ret;
1734 }
1735
1736 static void reada_walk_down(struct btrfs_root *root,
1737                             struct extent_buffer *node, int slot)
1738 {
1739         u64 bytenr;
1740         u64 ptr_gen;
1741         u32 nritems;
1742         u32 blocksize;
1743         int i;
1744         int level;
1745
1746         level = btrfs_header_level(node);
1747         if (level != 1)
1748                 return;
1749
1750         nritems = btrfs_header_nritems(node);
1751         blocksize = btrfs_level_size(root, level - 1);
1752         for (i = slot; i < nritems; i++) {
1753                 bytenr = btrfs_node_blockptr(node, i);
1754                 ptr_gen = btrfs_node_ptr_generation(node, i);
1755                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1756         }
1757 }
1758
1759 /*
1760  * Check the child node/leaf by the following condition:
1761  * 1. the first item key of the node/leaf should be the same with the one
1762  *    in parent.
1763  * 2. block in parent node should match the child node/leaf.
1764  * 3. generation of parent node and child's header should be consistent.
1765  *
1766  * Or the child node/leaf pointed by the key in parent is not valid.
1767  *
1768  * We hope to check leaf owner too, but since subvol may share leaves,
1769  * which makes leaf owner check not so strong, key check should be
1770  * sufficient enough for that case.
1771  */
1772 static int check_child_node(struct btrfs_root *root,
1773                             struct extent_buffer *parent, int slot,
1774                             struct extent_buffer *child)
1775 {
1776         struct btrfs_key parent_key;
1777         struct btrfs_key child_key;
1778         int ret = 0;
1779
1780         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1781         if (btrfs_header_level(child) == 0)
1782                 btrfs_item_key_to_cpu(child, &child_key, 0);
1783         else
1784                 btrfs_node_key_to_cpu(child, &child_key, 0);
1785
1786         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1787                 ret = -EINVAL;
1788                 fprintf(stderr,
1789                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1790                         parent_key.objectid, parent_key.type, parent_key.offset,
1791                         child_key.objectid, child_key.type, child_key.offset);
1792         }
1793         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1794                 ret = -EINVAL;
1795                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1796                         btrfs_node_blockptr(parent, slot),
1797                         btrfs_header_bytenr(child));
1798         }
1799         if (btrfs_node_ptr_generation(parent, slot) !=
1800             btrfs_header_generation(child)) {
1801                 ret = -EINVAL;
1802                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1803                         btrfs_header_generation(child),
1804                         btrfs_node_ptr_generation(parent, slot));
1805         }
1806         return ret;
1807 }
1808
1809 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1810                           struct walk_control *wc, int *level)
1811 {
1812         enum btrfs_tree_block_status status;
1813         u64 bytenr;
1814         u64 ptr_gen;
1815         struct extent_buffer *next;
1816         struct extent_buffer *cur;
1817         u32 blocksize;
1818         int ret, err = 0;
1819         u64 refs;
1820
1821         WARN_ON(*level < 0);
1822         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1823         ret = btrfs_lookup_extent_info(NULL, root,
1824                                        path->nodes[*level]->start,
1825                                        *level, 1, &refs, NULL);
1826         if (ret < 0) {
1827                 err = ret;
1828                 goto out;
1829         }
1830
1831         if (refs > 1) {
1832                 ret = enter_shared_node(root, path->nodes[*level]->start,
1833                                         refs, wc, *level);
1834                 if (ret > 0) {
1835                         err = ret;
1836                         goto out;
1837                 }
1838         }
1839
1840         while (*level >= 0) {
1841                 WARN_ON(*level < 0);
1842                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1843                 cur = path->nodes[*level];
1844
1845                 if (btrfs_header_level(cur) != *level)
1846                         WARN_ON(1);
1847
1848                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1849                         break;
1850                 if (*level == 0) {
1851                         ret = process_one_leaf(root, cur, wc);
1852                         if (ret < 0)
1853                                 err = ret;
1854                         break;
1855                 }
1856                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1857                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1858                 blocksize = btrfs_level_size(root, *level - 1);
1859                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1860                                                1, &refs, NULL);
1861                 if (ret < 0)
1862                         refs = 0;
1863
1864                 if (refs > 1) {
1865                         ret = enter_shared_node(root, bytenr, refs,
1866                                                 wc, *level - 1);
1867                         if (ret > 0) {
1868                                 path->slots[*level]++;
1869                                 continue;
1870                         }
1871                 }
1872
1873                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1874                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1875                         free_extent_buffer(next);
1876                         reada_walk_down(root, cur, path->slots[*level]);
1877                         next = read_tree_block(root, bytenr, blocksize,
1878                                                ptr_gen);
1879                         if (!extent_buffer_uptodate(next)) {
1880                                 struct btrfs_key node_key;
1881
1882                                 btrfs_node_key_to_cpu(path->nodes[*level],
1883                                                       &node_key,
1884                                                       path->slots[*level]);
1885                                 btrfs_add_corrupt_extent_record(root->fs_info,
1886                                                 &node_key,
1887                                                 path->nodes[*level]->start,
1888                                                 root->leafsize, *level);
1889                                 err = -EIO;
1890                                 goto out;
1891                         }
1892                 }
1893
1894                 ret = check_child_node(root, cur, path->slots[*level], next);
1895                 if (ret) {
1896                         err = ret;
1897                         goto out;
1898                 }
1899
1900                 if (btrfs_is_leaf(next))
1901                         status = btrfs_check_leaf(root, NULL, next);
1902                 else
1903                         status = btrfs_check_node(root, NULL, next);
1904                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1905                         free_extent_buffer(next);
1906                         err = -EIO;
1907                         goto out;
1908                 }
1909
1910                 *level = *level - 1;
1911                 free_extent_buffer(path->nodes[*level]);
1912                 path->nodes[*level] = next;
1913                 path->slots[*level] = 0;
1914         }
1915 out:
1916         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1917         return err;
1918 }
1919
1920 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1921                         struct walk_control *wc, int *level)
1922 {
1923         int i;
1924         struct extent_buffer *leaf;
1925
1926         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1927                 leaf = path->nodes[i];
1928                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1929                         path->slots[i]++;
1930                         *level = i;
1931                         return 0;
1932                 } else {
1933                         free_extent_buffer(path->nodes[*level]);
1934                         path->nodes[*level] = NULL;
1935                         BUG_ON(*level > wc->active_node);
1936                         if (*level == wc->active_node)
1937                                 leave_shared_node(root, wc, *level);
1938                         *level = i + 1;
1939                 }
1940         }
1941         return 1;
1942 }
1943
1944 static int check_root_dir(struct inode_record *rec)
1945 {
1946         struct inode_backref *backref;
1947         int ret = -1;
1948
1949         if (!rec->found_inode_item || rec->errors)
1950                 goto out;
1951         if (rec->nlink != 1 || rec->found_link != 0)
1952                 goto out;
1953         if (list_empty(&rec->backrefs))
1954                 goto out;
1955         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1956         if (!backref->found_inode_ref)
1957                 goto out;
1958         if (backref->index != 0 || backref->namelen != 2 ||
1959             memcmp(backref->name, "..", 2))
1960                 goto out;
1961         if (backref->found_dir_index || backref->found_dir_item)
1962                 goto out;
1963         ret = 0;
1964 out:
1965         return ret;
1966 }
1967
1968 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1969                               struct btrfs_root *root, struct btrfs_path *path,
1970                               struct inode_record *rec)
1971 {
1972         struct btrfs_inode_item *ei;
1973         struct btrfs_key key;
1974         int ret;
1975
1976         key.objectid = rec->ino;
1977         key.type = BTRFS_INODE_ITEM_KEY;
1978         key.offset = (u64)-1;
1979
1980         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1981         if (ret < 0)
1982                 goto out;
1983         if (ret) {
1984                 if (!path->slots[0]) {
1985                         ret = -ENOENT;
1986                         goto out;
1987                 }
1988                 path->slots[0]--;
1989                 ret = 0;
1990         }
1991         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1992         if (key.objectid != rec->ino) {
1993                 ret = -ENOENT;
1994                 goto out;
1995         }
1996
1997         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1998                             struct btrfs_inode_item);
1999         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2000         btrfs_mark_buffer_dirty(path->nodes[0]);
2001         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2002         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2003                root->root_key.objectid);
2004 out:
2005         btrfs_release_path(path);
2006         return ret;
2007 }
2008
2009 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2010                                     struct btrfs_root *root,
2011                                     struct btrfs_path *path,
2012                                     struct inode_record *rec)
2013 {
2014         int ret;
2015
2016         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2017         btrfs_release_path(path);
2018         if (!ret)
2019                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2020         return ret;
2021 }
2022
2023 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2024                                struct btrfs_root *root,
2025                                struct btrfs_path *path,
2026                                struct inode_record *rec)
2027 {
2028         struct btrfs_inode_item *ei;
2029         struct btrfs_key key;
2030         int ret = 0;
2031
2032         key.objectid = rec->ino;
2033         key.type = BTRFS_INODE_ITEM_KEY;
2034         key.offset = 0;
2035
2036         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2037         if (ret) {
2038                 if (ret > 0)
2039                         ret = -ENOENT;
2040                 goto out;
2041         }
2042
2043         /* Since ret == 0, no need to check anything */
2044         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2045                             struct btrfs_inode_item);
2046         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2047         btrfs_mark_buffer_dirty(path->nodes[0]);
2048         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2049         printf("reset nbytes for ino %llu root %llu\n",
2050                rec->ino, root->root_key.objectid);
2051 out:
2052         btrfs_release_path(path);
2053         return ret;
2054 }
2055
2056 static int add_missing_dir_index(struct btrfs_root *root,
2057                                  struct cache_tree *inode_cache,
2058                                  struct inode_record *rec,
2059                                  struct inode_backref *backref)
2060 {
2061         struct btrfs_path *path;
2062         struct btrfs_trans_handle *trans;
2063         struct btrfs_dir_item *dir_item;
2064         struct extent_buffer *leaf;
2065         struct btrfs_key key;
2066         struct btrfs_disk_key disk_key;
2067         struct inode_record *dir_rec;
2068         unsigned long name_ptr;
2069         u32 data_size = sizeof(*dir_item) + backref->namelen;
2070         int ret;
2071
2072         path = btrfs_alloc_path();
2073         if (!path)
2074                 return -ENOMEM;
2075
2076         trans = btrfs_start_transaction(root, 1);
2077         if (IS_ERR(trans)) {
2078                 btrfs_free_path(path);
2079                 return PTR_ERR(trans);
2080         }
2081
2082         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2083                 (unsigned long long)rec->ino);
2084         key.objectid = backref->dir;
2085         key.type = BTRFS_DIR_INDEX_KEY;
2086         key.offset = backref->index;
2087
2088         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2089         BUG_ON(ret);
2090
2091         leaf = path->nodes[0];
2092         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2093
2094         disk_key.objectid = cpu_to_le64(rec->ino);
2095         disk_key.type = BTRFS_INODE_ITEM_KEY;
2096         disk_key.offset = 0;
2097
2098         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2099         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2100         btrfs_set_dir_data_len(leaf, dir_item, 0);
2101         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2102         name_ptr = (unsigned long)(dir_item + 1);
2103         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2104         btrfs_mark_buffer_dirty(leaf);
2105         btrfs_free_path(path);
2106         btrfs_commit_transaction(trans, root);
2107
2108         backref->found_dir_index = 1;
2109         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2110         BUG_ON(IS_ERR(dir_rec));
2111         if (!dir_rec)
2112                 return 0;
2113         dir_rec->found_size += backref->namelen;
2114         if (dir_rec->found_size == dir_rec->isize &&
2115             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2116                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2117         if (dir_rec->found_size != dir_rec->isize)
2118                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2119
2120         return 0;
2121 }
2122
2123 static int delete_dir_index(struct btrfs_root *root,
2124                             struct cache_tree *inode_cache,
2125                             struct inode_record *rec,
2126                             struct inode_backref *backref)
2127 {
2128         struct btrfs_trans_handle *trans;
2129         struct btrfs_dir_item *di;
2130         struct btrfs_path *path;
2131         int ret = 0;
2132
2133         path = btrfs_alloc_path();
2134         if (!path)
2135                 return -ENOMEM;
2136
2137         trans = btrfs_start_transaction(root, 1);
2138         if (IS_ERR(trans)) {
2139                 btrfs_free_path(path);
2140                 return PTR_ERR(trans);
2141         }
2142
2143
2144         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2145                 (unsigned long long)backref->dir,
2146                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2147                 (unsigned long long)root->objectid);
2148
2149         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2150                                     backref->name, backref->namelen,
2151                                     backref->index, -1);
2152         if (IS_ERR(di)) {
2153                 ret = PTR_ERR(di);
2154                 btrfs_free_path(path);
2155                 btrfs_commit_transaction(trans, root);
2156                 if (ret == -ENOENT)
2157                         return 0;
2158                 return ret;
2159         }
2160
2161         if (!di)
2162                 ret = btrfs_del_item(trans, root, path);
2163         else
2164                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2165         BUG_ON(ret);
2166         btrfs_free_path(path);
2167         btrfs_commit_transaction(trans, root);
2168         return ret;
2169 }
2170
2171 static int create_inode_item(struct btrfs_root *root,
2172                              struct inode_record *rec,
2173                              struct inode_backref *backref, int root_dir)
2174 {
2175         struct btrfs_trans_handle *trans;
2176         struct btrfs_inode_item inode_item;
2177         time_t now = time(NULL);
2178         int ret;
2179
2180         trans = btrfs_start_transaction(root, 1);
2181         if (IS_ERR(trans)) {
2182                 ret = PTR_ERR(trans);
2183                 return ret;
2184         }
2185
2186         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2187                 "be incomplete, please check permissions and content after "
2188                 "the fsck completes.\n", (unsigned long long)root->objectid,
2189                 (unsigned long long)rec->ino);
2190
2191         memset(&inode_item, 0, sizeof(inode_item));
2192         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2193         if (root_dir)
2194                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2195         else
2196                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2197         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2198         if (rec->found_dir_item) {
2199                 if (rec->found_file_extent)
2200                         fprintf(stderr, "root %llu inode %llu has both a dir "
2201                                 "item and extents, unsure if it is a dir or a "
2202                                 "regular file so setting it as a directory\n",
2203                                 (unsigned long long)root->objectid,
2204                                 (unsigned long long)rec->ino);
2205                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2206                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2207         } else if (!rec->found_dir_item) {
2208                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2209                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2210         }
2211         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2212         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2213         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2214         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2215         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2216         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2217         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2218         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2219
2220         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2221         BUG_ON(ret);
2222         btrfs_commit_transaction(trans, root);
2223         return 0;
2224 }
2225
2226 static int repair_inode_backrefs(struct btrfs_root *root,
2227                                  struct inode_record *rec,
2228                                  struct cache_tree *inode_cache,
2229                                  int delete)
2230 {
2231         struct inode_backref *tmp, *backref;
2232         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2233         int ret = 0;
2234         int repaired = 0;
2235
2236         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2237                 if (!delete && rec->ino == root_dirid) {
2238                         if (!rec->found_inode_item) {
2239                                 ret = create_inode_item(root, rec, backref, 1);
2240                                 if (ret)
2241                                         break;
2242                                 repaired++;
2243                         }
2244                 }
2245
2246                 /* Index 0 for root dir's are special, don't mess with it */
2247                 if (rec->ino == root_dirid && backref->index == 0)
2248                         continue;
2249
2250                 if (delete &&
2251                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2252                      (backref->found_dir_index && backref->found_inode_ref &&
2253                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2254                         ret = delete_dir_index(root, inode_cache, rec, backref);
2255                         if (ret)
2256                                 break;
2257                         repaired++;
2258                         list_del(&backref->list);
2259                         free(backref);
2260                 }
2261
2262                 if (!delete && !backref->found_dir_index &&
2263                     backref->found_dir_item && backref->found_inode_ref) {
2264                         ret = add_missing_dir_index(root, inode_cache, rec,
2265                                                     backref);
2266                         if (ret)
2267                                 break;
2268                         repaired++;
2269                         if (backref->found_dir_item &&
2270                             backref->found_dir_index &&
2271                             backref->found_dir_index) {
2272                                 if (!backref->errors &&
2273                                     backref->found_inode_ref) {
2274                                         list_del(&backref->list);
2275                                         free(backref);
2276                                 }
2277                         }
2278                 }
2279
2280                 if (!delete && (!backref->found_dir_index &&
2281                                 !backref->found_dir_item &&
2282                                 backref->found_inode_ref)) {
2283                         struct btrfs_trans_handle *trans;
2284                         struct btrfs_key location;
2285
2286                         ret = check_dir_conflict(root, backref->name,
2287                                                  backref->namelen,
2288                                                  backref->dir,
2289                                                  backref->index);
2290                         if (ret) {
2291                                 /*
2292                                  * let nlink fixing routine to handle it,
2293                                  * which can do it better.
2294                                  */
2295                                 ret = 0;
2296                                 break;
2297                         }
2298                         location.objectid = rec->ino;
2299                         location.type = BTRFS_INODE_ITEM_KEY;
2300                         location.offset = 0;
2301
2302                         trans = btrfs_start_transaction(root, 1);
2303                         if (IS_ERR(trans)) {
2304                                 ret = PTR_ERR(trans);
2305                                 break;
2306                         }
2307                         fprintf(stderr, "adding missing dir index/item pair "
2308                                 "for inode %llu\n",
2309                                 (unsigned long long)rec->ino);
2310                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2311                                                     backref->namelen,
2312                                                     backref->dir, &location,
2313                                                     imode_to_type(rec->imode),
2314                                                     backref->index);
2315                         BUG_ON(ret);
2316                         btrfs_commit_transaction(trans, root);
2317                         repaired++;
2318                 }
2319
2320                 if (!delete && (backref->found_inode_ref &&
2321                                 backref->found_dir_index &&
2322                                 backref->found_dir_item &&
2323                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2324                                 !rec->found_inode_item)) {
2325                         ret = create_inode_item(root, rec, backref, 0);
2326                         if (ret)
2327                                 break;
2328                         repaired++;
2329                 }
2330
2331         }
2332         return ret ? ret : repaired;
2333 }
2334
2335 /*
2336  * To determine the file type for nlink/inode_item repair
2337  *
2338  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2339  * Return -ENOENT if file type is not found.
2340  */
2341 static int find_file_type(struct inode_record *rec, u8 *type)
2342 {
2343         struct inode_backref *backref;
2344
2345         /* For inode item recovered case */
2346         if (rec->found_inode_item) {
2347                 *type = imode_to_type(rec->imode);
2348                 return 0;
2349         }
2350
2351         list_for_each_entry(backref, &rec->backrefs, list) {
2352                 if (backref->found_dir_index || backref->found_dir_item) {
2353                         *type = backref->filetype;
2354                         return 0;
2355                 }
2356         }
2357         return -ENOENT;
2358 }
2359
2360 /*
2361  * To determine the file name for nlink repair
2362  *
2363  * Return 0 if file name is found, set name and namelen.
2364  * Return -ENOENT if file name is not found.
2365  */
2366 static int find_file_name(struct inode_record *rec,
2367                           char *name, int *namelen)
2368 {
2369         struct inode_backref *backref;
2370
2371         list_for_each_entry(backref, &rec->backrefs, list) {
2372                 if (backref->found_dir_index || backref->found_dir_item ||
2373                     backref->found_inode_ref) {
2374                         memcpy(name, backref->name, backref->namelen);
2375                         *namelen = backref->namelen;
2376                         return 0;
2377                 }
2378         }
2379         return -ENOENT;
2380 }
2381
2382 /* Reset the nlink of the inode to the correct one */
2383 static int reset_nlink(struct btrfs_trans_handle *trans,
2384                        struct btrfs_root *root,
2385                        struct btrfs_path *path,
2386                        struct inode_record *rec)
2387 {
2388         struct inode_backref *backref;
2389         struct inode_backref *tmp;
2390         struct btrfs_key key;
2391         struct btrfs_inode_item *inode_item;
2392         int ret = 0;
2393
2394         /* We don't believe this either, reset it and iterate backref */
2395         rec->found_link = 0;
2396
2397         /* Remove all backref including the valid ones */
2398         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2399                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2400                                    backref->index, backref->name,
2401                                    backref->namelen, 0);
2402                 if (ret < 0)
2403                         goto out;
2404
2405                 /* remove invalid backref, so it won't be added back */
2406                 if (!(backref->found_dir_index &&
2407                       backref->found_dir_item &&
2408                       backref->found_inode_ref)) {
2409                         list_del(&backref->list);
2410                         free(backref);
2411                 } else {
2412                         rec->found_link++;
2413                 }
2414         }
2415
2416         /* Set nlink to 0 */
2417         key.objectid = rec->ino;
2418         key.type = BTRFS_INODE_ITEM_KEY;
2419         key.offset = 0;
2420         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2421         if (ret < 0)
2422                 goto out;
2423         if (ret > 0) {
2424                 ret = -ENOENT;
2425                 goto out;
2426         }
2427         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2428                                     struct btrfs_inode_item);
2429         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2430         btrfs_mark_buffer_dirty(path->nodes[0]);
2431         btrfs_release_path(path);
2432
2433         /*
2434          * Add back valid inode_ref/dir_item/dir_index,
2435          * add_link() will handle the nlink inc, so new nlink must be correct
2436          */
2437         list_for_each_entry(backref, &rec->backrefs, list) {
2438                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2439                                      backref->name, backref->namelen,
2440                                      backref->filetype, &backref->index, 1);
2441                 if (ret < 0)
2442                         goto out;
2443         }
2444 out:
2445         btrfs_release_path(path);
2446         return ret;
2447 }
2448
2449 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2450                                struct btrfs_root *root,
2451                                struct btrfs_path *path,
2452                                struct inode_record *rec)
2453 {
2454         char *dir_name = "lost+found";
2455         char namebuf[BTRFS_NAME_LEN] = {0};
2456         u64 lost_found_ino;
2457         u32 mode = 0700;
2458         u8 type = 0;
2459         int namelen = 0;
2460         int name_recovered = 0;
2461         int type_recovered = 0;
2462         int ret = 0;
2463
2464         /*
2465          * Get file name and type first before these invalid inode ref
2466          * are deleted by remove_all_invalid_backref()
2467          */
2468         name_recovered = !find_file_name(rec, namebuf, &namelen);
2469         type_recovered = !find_file_type(rec, &type);
2470
2471         if (!name_recovered) {
2472                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2473                        rec->ino, rec->ino);
2474                 namelen = count_digits(rec->ino);
2475                 sprintf(namebuf, "%llu", rec->ino);
2476                 name_recovered = 1;
2477         }
2478         if (!type_recovered) {
2479                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2480                        rec->ino);
2481                 type = BTRFS_FT_REG_FILE;
2482                 type_recovered = 1;
2483         }
2484
2485         ret = reset_nlink(trans, root, path, rec);
2486         if (ret < 0) {
2487                 fprintf(stderr,
2488                         "Failed to reset nlink for inode %llu: %s\n",
2489                         rec->ino, strerror(-ret));
2490                 goto out;
2491         }
2492
2493         if (rec->found_link == 0) {
2494                 lost_found_ino = root->highest_inode;
2495                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2496                         ret = -EOVERFLOW;
2497                         goto out;
2498                 }
2499                 lost_found_ino++;
2500                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2501                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2502                                   mode);
2503                 if (ret < 0) {
2504                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2505                                 dir_name, strerror(-ret));
2506                         goto out;
2507                 }
2508                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2509                                      namebuf, namelen, type, NULL, 1);
2510                 /*
2511                  * Add ".INO" suffix several times to handle case where
2512                  * "FILENAME.INO" is already taken by another file.
2513                  */
2514                 while (ret == -EEXIST) {
2515                         /*
2516                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2517                          */
2518                         if (namelen + count_digits(rec->ino) + 1 >
2519                             BTRFS_NAME_LEN) {
2520                                 ret = -EFBIG;
2521                                 goto out;
2522                         }
2523                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2524                                  ".%llu", rec->ino);
2525                         namelen += count_digits(rec->ino) + 1;
2526                         ret = btrfs_add_link(trans, root, rec->ino,
2527                                              lost_found_ino, namebuf,
2528                                              namelen, type, NULL, 1);
2529                 }
2530                 if (ret < 0) {
2531                         fprintf(stderr,
2532                                 "Failed to link the inode %llu to %s dir: %s\n",
2533                                 rec->ino, dir_name, strerror(-ret));
2534                         goto out;
2535                 }
2536                 /*
2537                  * Just increase the found_link, don't actually add the
2538                  * backref. This will make things easier and this inode
2539                  * record will be freed after the repair is done.
2540                  * So fsck will not report problem about this inode.
2541                  */
2542                 rec->found_link++;
2543                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2544                        namelen, namebuf, dir_name);
2545         }
2546         printf("Fixed the nlink of inode %llu\n", rec->ino);
2547 out:
2548         /*
2549          * Clear the flag anyway, or we will loop forever for the same inode
2550          * as it will not be removed from the bad inode list and the dead loop
2551          * happens.
2552          */
2553         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2554         btrfs_release_path(path);
2555         return ret;
2556 }
2557
2558 /*
2559  * Check if there is any normal(reg or prealloc) file extent for given
2560  * ino.
2561  * This is used to determine the file type when neither its dir_index/item or
2562  * inode_item exists.
2563  *
2564  * This will *NOT* report error, if any error happens, just consider it does
2565  * not have any normal file extent.
2566  */
2567 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2568 {
2569         struct btrfs_path *path;
2570         struct btrfs_key key;
2571         struct btrfs_key found_key;
2572         struct btrfs_file_extent_item *fi;
2573         u8 type;
2574         int ret = 0;
2575
2576         path = btrfs_alloc_path();
2577         if (!path)
2578                 goto out;
2579         key.objectid = ino;
2580         key.type = BTRFS_EXTENT_DATA_KEY;
2581         key.offset = 0;
2582
2583         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2584         if (ret < 0) {
2585                 ret = 0;
2586                 goto out;
2587         }
2588         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2589                 ret = btrfs_next_leaf(root, path);
2590                 if (ret) {
2591                         ret = 0;
2592                         goto out;
2593                 }
2594         }
2595         while (1) {
2596                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2597                                       path->slots[0]);
2598                 if (found_key.objectid != ino ||
2599                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2600                         break;
2601                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2602                                     struct btrfs_file_extent_item);
2603                 type = btrfs_file_extent_type(path->nodes[0], fi);
2604                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2605                         ret = 1;
2606                         goto out;
2607                 }
2608         }
2609 out:
2610         btrfs_free_path(path);
2611         return ret;
2612 }
2613
2614 static u32 btrfs_type_to_imode(u8 type)
2615 {
2616         static u32 imode_by_btrfs_type[] = {
2617                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2618                 [BTRFS_FT_DIR]          = S_IFDIR,
2619                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2620                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2621                 [BTRFS_FT_FIFO]         = S_IFIFO,
2622                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2623                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2624         };
2625
2626         return imode_by_btrfs_type[(type)];
2627 }
2628
2629 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2630                                 struct btrfs_root *root,
2631                                 struct btrfs_path *path,
2632                                 struct inode_record *rec)
2633 {
2634         u8 filetype;
2635         u32 mode = 0700;
2636         int type_recovered = 0;
2637         int ret = 0;
2638
2639         printf("Trying to rebuild inode:%llu\n", rec->ino);
2640
2641         type_recovered = !find_file_type(rec, &filetype);
2642
2643         /*
2644          * Try to determine inode type if type not found.
2645          *
2646          * For found regular file extent, it must be FILE.
2647          * For found dir_item/index, it must be DIR.
2648          *
2649          * For undetermined one, use FILE as fallback.
2650          *
2651          * TODO:
2652          * 1. If found backref(inode_index/item is already handled) to it,
2653          *    it must be DIR.
2654          *    Need new inode-inode ref structure to allow search for that.
2655          */
2656         if (!type_recovered) {
2657                 if (rec->found_file_extent &&
2658                     find_normal_file_extent(root, rec->ino)) {
2659                         type_recovered = 1;
2660                         filetype = BTRFS_FT_REG_FILE;
2661                 } else if (rec->found_dir_item) {
2662                         type_recovered = 1;
2663                         filetype = BTRFS_FT_DIR;
2664                 } else if (!list_empty(&rec->orphan_extents)) {
2665                         type_recovered = 1;
2666                         filetype = BTRFS_FT_REG_FILE;
2667                 } else{
2668                         printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2669                                rec->ino);
2670                         type_recovered = 1;
2671                         filetype = BTRFS_FT_REG_FILE;
2672                 }
2673         }
2674
2675         ret = btrfs_new_inode(trans, root, rec->ino,
2676                               mode | btrfs_type_to_imode(filetype));
2677         if (ret < 0)
2678                 goto out;
2679
2680         /*
2681          * Here inode rebuild is done, we only rebuild the inode item,
2682          * don't repair the nlink(like move to lost+found).
2683          * That is the job of nlink repair.
2684          *
2685          * We just fill the record and return
2686          */
2687         rec->found_dir_item = 1;
2688         rec->imode = mode | btrfs_type_to_imode(filetype);
2689         rec->nlink = 0;
2690         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2691         /* Ensure the inode_nlinks repair function will be called */
2692         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2693 out:
2694         return ret;
2695 }
2696
2697 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2698                                       struct btrfs_root *root,
2699                                       struct btrfs_path *path,
2700                                       struct inode_record *rec)
2701 {
2702         struct orphan_data_extent *orphan;
2703         struct orphan_data_extent *tmp;
2704         int ret = 0;
2705
2706         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2707                 /*
2708                  * Check for conflicting file extents
2709                  *
2710                  * Here we don't know whether the extents is compressed or not,
2711                  * so we can only assume it not compressed nor data offset,
2712                  * and use its disk_len as extent length.
2713                  */
2714                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2715                                        orphan->offset, orphan->disk_len, 0);
2716                 btrfs_release_path(path);
2717                 if (ret < 0)
2718                         goto out;
2719                 if (!ret) {
2720                         fprintf(stderr,
2721                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2722                                 orphan->disk_bytenr, orphan->disk_len);
2723                         ret = btrfs_free_extent(trans,
2724                                         root->fs_info->extent_root,
2725                                         orphan->disk_bytenr, orphan->disk_len,
2726                                         0, root->objectid, orphan->objectid,
2727                                         orphan->offset);
2728                         if (ret < 0)
2729                                 goto out;
2730                 }
2731                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2732                                 orphan->offset, orphan->disk_bytenr,
2733                                 orphan->disk_len, orphan->disk_len);
2734                 if (ret < 0)
2735                         goto out;
2736
2737                 /* Update file size info */
2738                 rec->found_size += orphan->disk_len;
2739                 if (rec->found_size == rec->nbytes)
2740                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2741
2742                 /* Update the file extent hole info too */
2743                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2744                                            orphan->disk_len);
2745                 if (ret < 0)
2746                         goto out;
2747                 if (RB_EMPTY_ROOT(&rec->holes))
2748                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2749
2750                 list_del(&orphan->list);
2751                 free(orphan);
2752         }
2753         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2754 out:
2755         return ret;
2756 }
2757
2758 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2759                                         struct btrfs_root *root,
2760                                         struct btrfs_path *path,
2761                                         struct inode_record *rec)
2762 {
2763         struct rb_node *node;
2764         struct file_extent_hole *hole;
2765         int found = 0;
2766         int ret = 0;
2767
2768         node = rb_first(&rec->holes);
2769
2770         while (node) {
2771                 found = 1;
2772                 hole = rb_entry(node, struct file_extent_hole, node);
2773                 ret = btrfs_punch_hole(trans, root, rec->ino,
2774                                        hole->start, hole->len);
2775                 if (ret < 0)
2776                         goto out;
2777                 ret = del_file_extent_hole(&rec->holes, hole->start,
2778                                            hole->len);
2779                 if (ret < 0)
2780                         goto out;
2781                 if (RB_EMPTY_ROOT(&rec->holes))
2782                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2783                 node = rb_first(&rec->holes);
2784         }
2785         /* special case for a file losing all its file extent */
2786         if (!found) {
2787                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2788                                        round_up(rec->isize, root->sectorsize));
2789                 if (ret < 0)
2790                         goto out;
2791         }
2792         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2793                rec->ino, root->objectid);
2794 out:
2795         return ret;
2796 }
2797
2798 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2799 {
2800         struct btrfs_trans_handle *trans;
2801         struct btrfs_path *path;
2802         int ret = 0;
2803
2804         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2805                              I_ERR_NO_ORPHAN_ITEM |
2806                              I_ERR_LINK_COUNT_WRONG |
2807                              I_ERR_NO_INODE_ITEM |
2808                              I_ERR_FILE_EXTENT_ORPHAN |
2809                              I_ERR_FILE_EXTENT_DISCOUNT|
2810                              I_ERR_FILE_NBYTES_WRONG)))
2811                 return rec->errors;
2812
2813         path = btrfs_alloc_path();
2814         if (!path)
2815                 return -ENOMEM;
2816
2817         /*
2818          * For nlink repair, it may create a dir and add link, so
2819          * 2 for parent(256)'s dir_index and dir_item
2820          * 2 for lost+found dir's inode_item and inode_ref
2821          * 1 for the new inode_ref of the file
2822          * 2 for lost+found dir's dir_index and dir_item for the file
2823          */
2824         trans = btrfs_start_transaction(root, 7);
2825         if (IS_ERR(trans)) {
2826                 btrfs_free_path(path);
2827                 return PTR_ERR(trans);
2828         }
2829
2830         if (rec->errors & I_ERR_NO_INODE_ITEM)
2831                 ret = repair_inode_no_item(trans, root, path, rec);
2832         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2833                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2834         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2835                 ret = repair_inode_discount_extent(trans, root, path, rec);
2836         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2837                 ret = repair_inode_isize(trans, root, path, rec);
2838         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2839                 ret = repair_inode_orphan_item(trans, root, path, rec);
2840         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2841                 ret = repair_inode_nlinks(trans, root, path, rec);
2842         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2843                 ret = repair_inode_nbytes(trans, root, path, rec);
2844         btrfs_commit_transaction(trans, root);
2845         btrfs_free_path(path);
2846         return ret;
2847 }
2848
2849 static int check_inode_recs(struct btrfs_root *root,
2850                             struct cache_tree *inode_cache)
2851 {
2852         struct cache_extent *cache;
2853         struct ptr_node *node;
2854         struct inode_record *rec;
2855         struct inode_backref *backref;
2856         int stage = 0;
2857         int ret = 0;
2858         int err = 0;
2859         u64 error = 0;
2860         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2861
2862         if (btrfs_root_refs(&root->root_item) == 0) {
2863                 if (!cache_tree_empty(inode_cache))
2864                         fprintf(stderr, "warning line %d\n", __LINE__);
2865                 return 0;
2866         }
2867
2868         /*
2869          * We need to record the highest inode number for later 'lost+found'
2870          * dir creation.
2871          * We must select a ino not used/refered by any existing inode, or
2872          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2873          * this may cause 'lost+found' dir has wrong nlinks.
2874          */
2875         cache = last_cache_extent(inode_cache);
2876         if (cache) {
2877                 node = container_of(cache, struct ptr_node, cache);
2878                 rec = node->data;
2879                 if (rec->ino > root->highest_inode)
2880                         root->highest_inode = rec->ino;
2881         }
2882
2883         /*
2884          * We need to repair backrefs first because we could change some of the
2885          * errors in the inode recs.
2886          *
2887          * We also need to go through and delete invalid backrefs first and then
2888          * add the correct ones second.  We do this because we may get EEXIST
2889          * when adding back the correct index because we hadn't yet deleted the
2890          * invalid index.
2891          *
2892          * For example, if we were missing a dir index then the directories
2893          * isize would be wrong, so if we fixed the isize to what we thought it
2894          * would be and then fixed the backref we'd still have a invalid fs, so
2895          * we need to add back the dir index and then check to see if the isize
2896          * is still wrong.
2897          */
2898         while (stage < 3) {
2899                 stage++;
2900                 if (stage == 3 && !err)
2901                         break;
2902
2903                 cache = search_cache_extent(inode_cache, 0);
2904                 while (repair && cache) {
2905                         node = container_of(cache, struct ptr_node, cache);
2906                         rec = node->data;
2907                         cache = next_cache_extent(cache);
2908
2909                         /* Need to free everything up and rescan */
2910                         if (stage == 3) {
2911                                 remove_cache_extent(inode_cache, &node->cache);
2912                                 free(node);
2913                                 free_inode_rec(rec);
2914                                 continue;
2915                         }
2916
2917                         if (list_empty(&rec->backrefs))
2918                                 continue;
2919
2920                         ret = repair_inode_backrefs(root, rec, inode_cache,
2921                                                     stage == 1);
2922                         if (ret < 0) {
2923                                 err = ret;
2924                                 stage = 2;
2925                                 break;
2926                         } if (ret > 0) {
2927                                 err = -EAGAIN;
2928                         }
2929                 }
2930         }
2931         if (err)
2932                 return err;
2933
2934         rec = get_inode_rec(inode_cache, root_dirid, 0);
2935         BUG_ON(IS_ERR(rec));
2936         if (rec) {
2937                 ret = check_root_dir(rec);
2938                 if (ret) {
2939                         fprintf(stderr, "root %llu root dir %llu error\n",
2940                                 (unsigned long long)root->root_key.objectid,
2941                                 (unsigned long long)root_dirid);
2942                         print_inode_error(root, rec);
2943                         error++;
2944                 }
2945         } else {
2946                 if (repair) {
2947                         struct btrfs_trans_handle *trans;
2948
2949                         trans = btrfs_start_transaction(root, 1);
2950                         if (IS_ERR(trans)) {
2951                                 err = PTR_ERR(trans);
2952                                 return err;
2953                         }
2954
2955                         fprintf(stderr,
2956                                 "root %llu missing its root dir, recreating\n",
2957                                 (unsigned long long)root->objectid);
2958
2959                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2960                         BUG_ON(ret);
2961
2962                         btrfs_commit_transaction(trans, root);
2963                         return -EAGAIN;
2964                 }
2965
2966                 fprintf(stderr, "root %llu root dir %llu not found\n",
2967                         (unsigned long long)root->root_key.objectid,
2968                         (unsigned long long)root_dirid);
2969         }
2970
2971         while (1) {
2972                 cache = search_cache_extent(inode_cache, 0);
2973                 if (!cache)
2974                         break;
2975                 node = container_of(cache, struct ptr_node, cache);
2976                 rec = node->data;
2977                 remove_cache_extent(inode_cache, &node->cache);
2978                 free(node);
2979                 if (rec->ino == root_dirid ||
2980                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2981                         free_inode_rec(rec);
2982                         continue;
2983                 }
2984
2985                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2986                         ret = check_orphan_item(root, rec->ino);
2987                         if (ret == 0)
2988                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2989                         if (can_free_inode_rec(rec)) {
2990                                 free_inode_rec(rec);
2991                                 continue;
2992                         }
2993                 }
2994
2995                 if (!rec->found_inode_item)
2996                         rec->errors |= I_ERR_NO_INODE_ITEM;
2997                 if (rec->found_link != rec->nlink)
2998                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2999                 if (repair) {
3000                         ret = try_repair_inode(root, rec);
3001                         if (ret == 0 && can_free_inode_rec(rec)) {
3002                                 free_inode_rec(rec);
3003                                 continue;
3004                         }
3005                         ret = 0;
3006                 }
3007
3008                 if (!(repair && ret == 0))
3009                         error++;
3010                 print_inode_error(root, rec);
3011                 list_for_each_entry(backref, &rec->backrefs, list) {
3012                         if (!backref->found_dir_item)
3013                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3014                         if (!backref->found_dir_index)
3015                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3016                         if (!backref->found_inode_ref)
3017                                 backref->errors |= REF_ERR_NO_INODE_REF;
3018                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3019                                 " namelen %u name %s filetype %d errors %x",
3020                                 (unsigned long long)backref->dir,
3021                                 (unsigned long long)backref->index,
3022                                 backref->namelen, backref->name,
3023                                 backref->filetype, backref->errors);
3024                         print_ref_error(backref->errors);
3025                 }
3026                 free_inode_rec(rec);
3027         }
3028         return (error > 0) ? -1 : 0;
3029 }
3030
3031 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3032                                         u64 objectid)
3033 {
3034         struct cache_extent *cache;
3035         struct root_record *rec = NULL;
3036         int ret;
3037
3038         cache = lookup_cache_extent(root_cache, objectid, 1);
3039         if (cache) {
3040                 rec = container_of(cache, struct root_record, cache);
3041         } else {
3042                 rec = calloc(1, sizeof(*rec));
3043                 if (!rec)
3044                         return ERR_PTR(-ENOMEM);
3045                 rec->objectid = objectid;
3046                 INIT_LIST_HEAD(&rec->backrefs);
3047                 rec->cache.start = objectid;
3048                 rec->cache.size = 1;
3049
3050                 ret = insert_cache_extent(root_cache, &rec->cache);
3051                 if (ret)
3052                         return ERR_PTR(-EEXIST);
3053         }
3054         return rec;
3055 }
3056
3057 static struct root_backref *get_root_backref(struct root_record *rec,
3058                                              u64 ref_root, u64 dir, u64 index,
3059                                              const char *name, int namelen)
3060 {
3061         struct root_backref *backref;
3062
3063         list_for_each_entry(backref, &rec->backrefs, list) {
3064                 if (backref->ref_root != ref_root || backref->dir != dir ||
3065                     backref->namelen != namelen)
3066                         continue;
3067                 if (memcmp(name, backref->name, namelen))
3068                         continue;
3069                 return backref;
3070         }
3071
3072         backref = calloc(1, sizeof(*backref) + namelen + 1);
3073         backref->ref_root = ref_root;
3074         backref->dir = dir;
3075         backref->index = index;
3076         backref->namelen = namelen;
3077         memcpy(backref->name, name, namelen);
3078         backref->name[namelen] = '\0';
3079         list_add_tail(&backref->list, &rec->backrefs);
3080         return backref;
3081 }
3082
3083 static void free_root_record(struct cache_extent *cache)
3084 {
3085         struct root_record *rec;
3086         struct root_backref *backref;
3087
3088         rec = container_of(cache, struct root_record, cache);
3089         while (!list_empty(&rec->backrefs)) {
3090                 backref = list_entry(rec->backrefs.next,
3091                                      struct root_backref, list);
3092                 list_del(&backref->list);
3093                 free(backref);
3094         }
3095
3096         kfree(rec);
3097 }
3098
3099 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3100
3101 static int add_root_backref(struct cache_tree *root_cache,
3102                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3103                             const char *name, int namelen,
3104                             int item_type, int errors)
3105 {
3106         struct root_record *rec;
3107         struct root_backref *backref;
3108
3109         rec = get_root_rec(root_cache, root_id);
3110         BUG_ON(IS_ERR(rec));
3111         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3112
3113         backref->errors |= errors;
3114
3115         if (item_type != BTRFS_DIR_ITEM_KEY) {
3116                 if (backref->found_dir_index || backref->found_back_ref ||
3117                     backref->found_forward_ref) {
3118                         if (backref->index != index)
3119                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3120                 } else {
3121                         backref->index = index;
3122                 }
3123         }
3124
3125         if (item_type == BTRFS_DIR_ITEM_KEY) {
3126                 if (backref->found_forward_ref)
3127                         rec->found_ref++;
3128                 backref->found_dir_item = 1;
3129         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3130                 backref->found_dir_index = 1;
3131         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3132                 if (backref->found_forward_ref)
3133                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3134                 else if (backref->found_dir_item)
3135                         rec->found_ref++;
3136                 backref->found_forward_ref = 1;
3137         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3138                 if (backref->found_back_ref)
3139                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3140                 backref->found_back_ref = 1;
3141         } else {
3142                 BUG_ON(1);
3143         }
3144
3145         if (backref->found_forward_ref && backref->found_dir_item)
3146                 backref->reachable = 1;
3147         return 0;
3148 }
3149
3150 static int merge_root_recs(struct btrfs_root *root,
3151                            struct cache_tree *src_cache,
3152                            struct cache_tree *dst_cache)
3153 {
3154         struct cache_extent *cache;
3155         struct ptr_node *node;
3156         struct inode_record *rec;
3157         struct inode_backref *backref;
3158         int ret = 0;
3159
3160         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3161                 free_inode_recs_tree(src_cache);
3162                 return 0;
3163         }
3164
3165         while (1) {
3166                 cache = search_cache_extent(src_cache, 0);
3167                 if (!cache)
3168                         break;
3169                 node = container_of(cache, struct ptr_node, cache);
3170                 rec = node->data;
3171                 remove_cache_extent(src_cache, &node->cache);
3172                 free(node);
3173
3174                 ret = is_child_root(root, root->objectid, rec->ino);
3175                 if (ret < 0)
3176                         break;
3177                 else if (ret == 0)
3178                         goto skip;
3179
3180                 list_for_each_entry(backref, &rec->backrefs, list) {
3181                         BUG_ON(backref->found_inode_ref);
3182                         if (backref->found_dir_item)
3183                                 add_root_backref(dst_cache, rec->ino,
3184                                         root->root_key.objectid, backref->dir,
3185                                         backref->index, backref->name,
3186                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3187                                         backref->errors);
3188                         if (backref->found_dir_index)
3189                                 add_root_backref(dst_cache, rec->ino,
3190                                         root->root_key.objectid, backref->dir,
3191                                         backref->index, backref->name,
3192                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3193                                         backref->errors);
3194                 }
3195 skip:
3196                 free_inode_rec(rec);
3197         }
3198         if (ret < 0)
3199                 return ret;
3200         return 0;
3201 }
3202
3203 static int check_root_refs(struct btrfs_root *root,
3204                            struct cache_tree *root_cache)
3205 {
3206         struct root_record *rec;
3207         struct root_record *ref_root;
3208         struct root_backref *backref;
3209         struct cache_extent *cache;
3210         int loop = 1;
3211         int ret;
3212         int error;
3213         int errors = 0;
3214
3215         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3216         BUG_ON(IS_ERR(rec));
3217         rec->found_ref = 1;
3218
3219         /* fixme: this can not detect circular references */
3220         while (loop) {
3221                 loop = 0;
3222                 cache = search_cache_extent(root_cache, 0);
3223                 while (1) {
3224                         if (!cache)
3225                                 break;
3226                         rec = container_of(cache, struct root_record, cache);
3227                         cache = next_cache_extent(cache);
3228
3229                         if (rec->found_ref == 0)
3230                                 continue;
3231
3232                         list_for_each_entry(backref, &rec->backrefs, list) {
3233                                 if (!backref->reachable)
3234                                         continue;
3235
3236                                 ref_root = get_root_rec(root_cache,
3237                                                         backref->ref_root);
3238                                 BUG_ON(IS_ERR(ref_root));
3239                                 if (ref_root->found_ref > 0)
3240                                         continue;
3241
3242                                 backref->reachable = 0;
3243                                 rec->found_ref--;
3244                                 if (rec->found_ref == 0)
3245                                         loop = 1;
3246                         }
3247                 }
3248         }
3249
3250         cache = search_cache_extent(root_cache, 0);
3251         while (1) {
3252                 if (!cache)
3253                         break;
3254                 rec = container_of(cache, struct root_record, cache);
3255                 cache = next_cache_extent(cache);
3256
3257                 if (rec->found_ref == 0 &&
3258                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3259                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3260                         ret = check_orphan_item(root->fs_info->tree_root,
3261                                                 rec->objectid);
3262                         if (ret == 0)
3263                                 continue;
3264
3265                         /*
3266                          * If we don't have a root item then we likely just have
3267                          * a dir item in a snapshot for this root but no actual
3268                          * ref key or anything so it's meaningless.
3269                          */
3270                         if (!rec->found_root_item)
3271                                 continue;
3272                         errors++;
3273                         fprintf(stderr, "fs tree %llu not referenced\n",
3274                                 (unsigned long long)rec->objectid);
3275                 }
3276
3277                 error = 0;
3278                 if (rec->found_ref > 0 && !rec->found_root_item)
3279                         error = 1;
3280                 list_for_each_entry(backref, &rec->backrefs, list) {
3281                         if (!backref->found_dir_item)
3282                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3283                         if (!backref->found_dir_index)
3284                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3285                         if (!backref->found_back_ref)
3286                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3287                         if (!backref->found_forward_ref)
3288                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3289                         if (backref->reachable && backref->errors)
3290                                 error = 1;
3291                 }
3292                 if (!error)
3293                         continue;
3294
3295                 errors++;
3296                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3297                         (unsigned long long)rec->objectid, rec->found_ref,
3298                          rec->found_root_item ? "" : "not found");
3299
3300                 list_for_each_entry(backref, &rec->backrefs, list) {
3301                         if (!backref->reachable)
3302                                 continue;
3303                         if (!backref->errors && rec->found_root_item)
3304                                 continue;
3305                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3306                                 " index %llu namelen %u name %s errors %x\n",
3307                                 (unsigned long long)backref->ref_root,
3308                                 (unsigned long long)backref->dir,
3309                                 (unsigned long long)backref->index,
3310                                 backref->namelen, backref->name,
3311                                 backref->errors);
3312                         print_ref_error(backref->errors);
3313                 }
3314         }
3315         return errors > 0 ? 1 : 0;
3316 }
3317
3318 static int process_root_ref(struct extent_buffer *eb, int slot,
3319                             struct btrfs_key *key,
3320                             struct cache_tree *root_cache)
3321 {
3322         u64 dirid;
3323         u64 index;
3324         u32 len;
3325         u32 name_len;
3326         struct btrfs_root_ref *ref;
3327         char namebuf[BTRFS_NAME_LEN];
3328         int error;
3329
3330         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3331
3332         dirid = btrfs_root_ref_dirid(eb, ref);
3333         index = btrfs_root_ref_sequence(eb, ref);
3334         name_len = btrfs_root_ref_name_len(eb, ref);
3335
3336         if (name_len <= BTRFS_NAME_LEN) {
3337                 len = name_len;
3338                 error = 0;
3339         } else {
3340                 len = BTRFS_NAME_LEN;
3341                 error = REF_ERR_NAME_TOO_LONG;
3342         }
3343         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3344
3345         if (key->type == BTRFS_ROOT_REF_KEY) {
3346                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3347                                  index, namebuf, len, key->type, error);
3348         } else {
3349                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3350                                  index, namebuf, len, key->type, error);
3351         }
3352         return 0;
3353 }
3354
3355 static void free_corrupt_block(struct cache_extent *cache)
3356 {
3357         struct btrfs_corrupt_block *corrupt;
3358
3359         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3360         free(corrupt);
3361 }
3362
3363 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3364
3365 /*
3366  * Repair the btree of the given root.
3367  *
3368  * The fix is to remove the node key in corrupt_blocks cache_tree.
3369  * and rebalance the tree.
3370  * After the fix, the btree should be writeable.
3371  */
3372 static int repair_btree(struct btrfs_root *root,
3373                         struct cache_tree *corrupt_blocks)
3374 {
3375         struct btrfs_trans_handle *trans;
3376         struct btrfs_path *path;
3377         struct btrfs_corrupt_block *corrupt;
3378         struct cache_extent *cache;
3379         struct btrfs_key key;
3380         u64 offset;
3381         int level;
3382         int ret = 0;
3383
3384         if (cache_tree_empty(corrupt_blocks))
3385                 return 0;
3386
3387         path = btrfs_alloc_path();
3388         if (!path)
3389                 return -ENOMEM;
3390
3391         trans = btrfs_start_transaction(root, 1);
3392         if (IS_ERR(trans)) {
3393                 ret = PTR_ERR(trans);
3394                 fprintf(stderr, "Error starting transaction: %s\n",
3395                         strerror(-ret));
3396                 goto out_free_path;
3397         }
3398         cache = first_cache_extent(corrupt_blocks);
3399         while (cache) {
3400                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3401                                        cache);
3402                 level = corrupt->level;
3403                 path->lowest_level = level;
3404                 key.objectid = corrupt->key.objectid;
3405                 key.type = corrupt->key.type;
3406                 key.offset = corrupt->key.offset;
3407
3408                 /*
3409                  * Here we don't want to do any tree balance, since it may
3410                  * cause a balance with corrupted brother leaf/node,
3411                  * so ins_len set to 0 here.
3412                  * Balance will be done after all corrupt node/leaf is deleted.
3413                  */
3414                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3415                 if (ret < 0)
3416                         goto out;
3417                 offset = btrfs_node_blockptr(path->nodes[level],
3418                                              path->slots[level]);
3419
3420                 /* Remove the ptr */
3421                 ret = btrfs_del_ptr(trans, root, path, level,
3422                                     path->slots[level]);
3423                 if (ret < 0)
3424                         goto out;
3425                 /*
3426                  * Remove the corresponding extent
3427                  * return value is not concerned.
3428                  */
3429                 btrfs_release_path(path);
3430                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3431                                         0, root->root_key.objectid,
3432                                         level - 1, 0);
3433                 cache = next_cache_extent(cache);
3434         }
3435
3436         /* Balance the btree using btrfs_search_slot() */
3437         cache = first_cache_extent(corrupt_blocks);
3438         while (cache) {
3439                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3440                                        cache);
3441                 memcpy(&key, &corrupt->key, sizeof(key));
3442                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3443                 if (ret < 0)
3444                         goto out;
3445                 /* return will always >0 since it won't find the item */
3446                 ret = 0;
3447                 btrfs_release_path(path);
3448                 cache = next_cache_extent(cache);
3449         }
3450 out:
3451         btrfs_commit_transaction(trans, root);
3452 out_free_path:
3453         btrfs_free_path(path);
3454         return ret;
3455 }
3456
3457 static int check_fs_root(struct btrfs_root *root,
3458                          struct cache_tree *root_cache,
3459                          struct walk_control *wc)
3460 {
3461         int ret = 0;
3462         int err = 0;
3463         int wret;
3464         int level;
3465         struct btrfs_path path;
3466         struct shared_node root_node;
3467         struct root_record *rec;
3468         struct btrfs_root_item *root_item = &root->root_item;
3469         struct cache_tree corrupt_blocks;
3470         struct orphan_data_extent *orphan;
3471         struct orphan_data_extent *tmp;
3472         enum btrfs_tree_block_status status;
3473
3474         /*
3475          * Reuse the corrupt_block cache tree to record corrupted tree block
3476          *
3477          * Unlike the usage in extent tree check, here we do it in a per
3478          * fs/subvol tree base.
3479          */
3480         cache_tree_init(&corrupt_blocks);
3481         root->fs_info->corrupt_blocks = &corrupt_blocks;
3482
3483         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3484                 rec = get_root_rec(root_cache, root->root_key.objectid);
3485                 BUG_ON(IS_ERR(rec));
3486                 if (btrfs_root_refs(root_item) > 0)
3487                         rec->found_root_item = 1;
3488         }
3489
3490         btrfs_init_path(&path);
3491         memset(&root_node, 0, sizeof(root_node));
3492         cache_tree_init(&root_node.root_cache);
3493         cache_tree_init(&root_node.inode_cache);
3494
3495         /* Move the orphan extent record to corresponding inode_record */
3496         list_for_each_entry_safe(orphan, tmp,
3497                                  &root->orphan_data_extents, list) {
3498                 struct inode_record *inode;
3499
3500                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3501                                       1);
3502                 BUG_ON(IS_ERR(inode));
3503                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3504                 list_move(&orphan->list, &inode->orphan_extents);
3505         }
3506
3507         level = btrfs_header_level(root->node);
3508         memset(wc->nodes, 0, sizeof(wc->nodes));
3509         wc->nodes[level] = &root_node;
3510         wc->active_node = level;
3511         wc->root_level = level;
3512
3513         /* We may not have checked the root block, lets do that now */
3514         if (btrfs_is_leaf(root->node))
3515                 status = btrfs_check_leaf(root, NULL, root->node);
3516         else
3517                 status = btrfs_check_node(root, NULL, root->node);
3518         if (status != BTRFS_TREE_BLOCK_CLEAN)
3519                 return -EIO;
3520
3521         if (btrfs_root_refs(root_item) > 0 ||
3522             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3523                 path.nodes[level] = root->node;
3524                 extent_buffer_get(root->node);
3525                 path.slots[level] = 0;
3526         } else {
3527                 struct btrfs_key key;
3528                 struct btrfs_disk_key found_key;
3529
3530                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3531                 level = root_item->drop_level;
3532                 path.lowest_level = level;
3533                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3534                 if (wret < 0)
3535                         goto skip_walking;
3536                 btrfs_node_key(path.nodes[level], &found_key,
3537                                 path.slots[level]);
3538                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3539                                         sizeof(found_key)));
3540         }
3541
3542         while (1) {
3543                 wret = walk_down_tree(root, &path, wc, &level);
3544                 if (wret < 0)
3545                         ret = wret;
3546                 if (wret != 0)
3547                         break;
3548
3549                 wret = walk_up_tree(root, &path, wc, &level);
3550                 if (wret < 0)
3551                         ret = wret;
3552                 if (wret != 0)
3553                         break;
3554         }
3555 skip_walking:
3556         btrfs_release_path(&path);
3557
3558         if (!cache_tree_empty(&corrupt_blocks)) {
3559                 struct cache_extent *cache;
3560                 struct btrfs_corrupt_block *corrupt;
3561
3562                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3563                        root->root_key.objectid);
3564                 cache = first_cache_extent(&corrupt_blocks);
3565                 while (cache) {
3566                         corrupt = container_of(cache,
3567                                                struct btrfs_corrupt_block,
3568                                                cache);
3569                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3570                                cache->start, corrupt->level,
3571                                corrupt->key.objectid, corrupt->key.type,
3572                                corrupt->key.offset);
3573                         cache = next_cache_extent(cache);
3574                 }
3575                 if (repair) {
3576                         printf("Try to repair the btree for root %llu\n",
3577                                root->root_key.objectid);
3578                         ret = repair_btree(root, &corrupt_blocks);
3579                         if (ret < 0)
3580                                 fprintf(stderr, "Failed to repair btree: %s\n",
3581                                         strerror(-ret));
3582                         if (!ret)
3583                                 printf("Btree for root %llu is fixed\n",
3584                                        root->root_key.objectid);
3585                 }
3586         }
3587
3588         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3589         if (err < 0)
3590                 ret = err;
3591
3592         if (root_node.current) {
3593                 root_node.current->checked = 1;
3594                 maybe_free_inode_rec(&root_node.inode_cache,
3595                                 root_node.current);
3596         }
3597
3598         err = check_inode_recs(root, &root_node.inode_cache);
3599         if (!ret)
3600                 ret = err;
3601
3602         free_corrupt_blocks_tree(&corrupt_blocks);
3603         root->fs_info->corrupt_blocks = NULL;
3604         free_orphan_data_extents(&root->orphan_data_extents);
3605         return ret;
3606 }
3607
3608 static int fs_root_objectid(u64 objectid)
3609 {
3610         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3611             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3612                 return 1;
3613         return is_fstree(objectid);
3614 }
3615
3616 static int check_fs_roots(struct btrfs_root *root,
3617                           struct cache_tree *root_cache)
3618 {
3619         struct btrfs_path path;
3620         struct btrfs_key key;
3621         struct walk_control wc;
3622         struct extent_buffer *leaf, *tree_node;
3623         struct btrfs_root *tmp_root;
3624         struct btrfs_root *tree_root = root->fs_info->tree_root;
3625         int ret;
3626         int err = 0;
3627
3628         if (ctx.progress_enabled) {
3629                 ctx.tp = TASK_FS_ROOTS;
3630                 task_start(ctx.info);
3631         }
3632
3633         /*
3634          * Just in case we made any changes to the extent tree that weren't
3635          * reflected into the free space cache yet.
3636          */
3637         if (repair)
3638                 reset_cached_block_groups(root->fs_info);
3639         memset(&wc, 0, sizeof(wc));
3640         cache_tree_init(&wc.shared);
3641         btrfs_init_path(&path);
3642
3643 again:
3644         key.offset = 0;
3645         key.objectid = 0;
3646         key.type = BTRFS_ROOT_ITEM_KEY;
3647         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3648         if (ret < 0) {
3649                 err = 1;
3650                 goto out;
3651         }
3652         tree_node = tree_root->node;
3653         while (1) {
3654                 if (tree_node != tree_root->node) {
3655                         free_root_recs_tree(root_cache);
3656                         btrfs_release_path(&path);
3657                         goto again;
3658                 }
3659                 leaf = path.nodes[0];
3660                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3661                         ret = btrfs_next_leaf(tree_root, &path);
3662                         if (ret) {
3663                                 if (ret < 0)
3664                                         err = 1;
3665                                 break;
3666                         }
3667                         leaf = path.nodes[0];
3668                 }
3669                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3670                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3671                     fs_root_objectid(key.objectid)) {
3672                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3673                                 tmp_root = btrfs_read_fs_root_no_cache(
3674                                                 root->fs_info, &key);
3675                         } else {
3676                                 key.offset = (u64)-1;
3677                                 tmp_root = btrfs_read_fs_root(
3678                                                 root->fs_info, &key);
3679                         }
3680                         if (IS_ERR(tmp_root)) {
3681                                 err = 1;
3682                                 goto next;
3683                         }
3684                         ret = check_fs_root(tmp_root, root_cache, &wc);
3685                         if (ret == -EAGAIN) {
3686                                 free_root_recs_tree(root_cache);
3687                                 btrfs_release_path(&path);
3688                                 goto again;
3689                         }
3690                         if (ret)
3691                                 err = 1;
3692                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3693                                 btrfs_free_fs_root(tmp_root);
3694                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3695                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3696                         process_root_ref(leaf, path.slots[0], &key,
3697                                          root_cache);
3698                 }
3699 next:
3700                 path.slots[0]++;
3701         }
3702 out:
3703         btrfs_release_path(&path);
3704         if (err)
3705                 free_extent_cache_tree(&wc.shared);
3706         if (!cache_tree_empty(&wc.shared))
3707                 fprintf(stderr, "warning line %d\n", __LINE__);
3708
3709         task_stop(ctx.info);
3710
3711         return err;
3712 }
3713
3714 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3715 {
3716         struct list_head *cur = rec->backrefs.next;
3717         struct extent_backref *back;
3718         struct tree_backref *tback;
3719         struct data_backref *dback;
3720         u64 found = 0;
3721         int err = 0;
3722
3723         while(cur != &rec->backrefs) {
3724                 back = list_entry(cur, struct extent_backref, list);
3725                 cur = cur->next;
3726                 if (!back->found_extent_tree) {
3727                         err = 1;
3728                         if (!print_errs)
3729                                 goto out;
3730                         if (back->is_data) {
3731                                 dback = (struct data_backref *)back;
3732                                 fprintf(stderr, "Backref %llu %s %llu"
3733                                         " owner %llu offset %llu num_refs %lu"
3734                                         " not found in extent tree\n",
3735                                         (unsigned long long)rec->start,
3736                                         back->full_backref ?
3737                                         "parent" : "root",
3738                                         back->full_backref ?
3739                                         (unsigned long long)dback->parent:
3740                                         (unsigned long long)dback->root,
3741                                         (unsigned long long)dback->owner,
3742                                         (unsigned long long)dback->offset,
3743                                         (unsigned long)dback->num_refs);
3744                         } else {
3745                                 tback = (struct tree_backref *)back;
3746                                 fprintf(stderr, "Backref %llu parent %llu"
3747                                         " root %llu not found in extent tree\n",
3748                                         (unsigned long long)rec->start,
3749                                         (unsigned long long)tback->parent,
3750                                         (unsigned long long)tback->root);
3751                         }
3752                 }
3753                 if (!back->is_data && !back->found_ref) {
3754                         err = 1;
3755                         if (!print_errs)
3756                                 goto out;
3757                         tback = (struct tree_backref *)back;
3758                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3759                                 (unsigned long long)rec->start,
3760                                 back->full_backref ? "parent" : "root",
3761                                 back->full_backref ?
3762                                 (unsigned long long)tback->parent :
3763                                 (unsigned long long)tback->root, back);
3764                 }
3765                 if (back->is_data) {
3766                         dback = (struct data_backref *)back;
3767                         if (dback->found_ref != dback->num_refs) {
3768                                 err = 1;
3769                                 if (!print_errs)
3770                                         goto out;
3771                                 fprintf(stderr, "Incorrect local backref count"
3772                                         " on %llu %s %llu owner %llu"
3773                                         " offset %llu found %u wanted %u back %p\n",
3774                                         (unsigned long long)rec->start,
3775                                         back->full_backref ?
3776                                         "parent" : "root",
3777                                         back->full_backref ?
3778                                         (unsigned long long)dback->parent:
3779                                         (unsigned long long)dback->root,
3780                                         (unsigned long long)dback->owner,
3781                                         (unsigned long long)dback->offset,
3782                                         dback->found_ref, dback->num_refs, back);
3783                         }
3784                         if (dback->disk_bytenr != rec->start) {
3785                                 err = 1;
3786                                 if (!print_errs)
3787                                         goto out;
3788                                 fprintf(stderr, "Backref disk bytenr does not"
3789                                         " match extent record, bytenr=%llu, "
3790                                         "ref bytenr=%llu\n",
3791                                         (unsigned long long)rec->start,
3792                                         (unsigned long long)dback->disk_bytenr);
3793                         }
3794
3795                         if (dback->bytes != rec->nr) {
3796                                 err = 1;
3797                                 if (!print_errs)
3798                                         goto out;
3799                                 fprintf(stderr, "Backref bytes do not match "
3800                                         "extent backref, bytenr=%llu, ref "
3801                                         "bytes=%llu, backref bytes=%llu\n",
3802                                         (unsigned long long)rec->start,
3803                                         (unsigned long long)rec->nr,
3804                                         (unsigned long long)dback->bytes);
3805                         }
3806                 }
3807                 if (!back->is_data) {
3808                         found += 1;
3809                 } else {
3810                         dback = (struct data_backref *)back;
3811                         found += dback->found_ref;
3812                 }
3813         }
3814         if (found != rec->refs) {
3815                 err = 1;
3816                 if (!print_errs)
3817                         goto out;
3818                 fprintf(stderr, "Incorrect global backref count "
3819                         "on %llu found %llu wanted %llu\n",
3820                         (unsigned long long)rec->start,
3821                         (unsigned long long)found,
3822                         (unsigned long long)rec->refs);
3823         }
3824 out:
3825         return err;
3826 }
3827
3828 static int free_all_extent_backrefs(struct extent_record *rec)
3829 {
3830         struct extent_backref *back;
3831         struct list_head *cur;
3832         while (!list_empty(&rec->backrefs)) {
3833                 cur = rec->backrefs.next;
3834                 back = list_entry(cur, struct extent_backref, list);
3835                 list_del(cur);
3836                 free(back);
3837         }
3838         return 0;
3839 }
3840
3841 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3842                                      struct cache_tree *extent_cache)
3843 {
3844         struct cache_extent *cache;
3845         struct extent_record *rec;
3846
3847         while (1) {
3848                 cache = first_cache_extent(extent_cache);
3849                 if (!cache)
3850                         break;
3851                 rec = container_of(cache, struct extent_record, cache);
3852                 remove_cache_extent(extent_cache, cache);
3853                 free_all_extent_backrefs(rec);
3854                 free(rec);
3855         }
3856 }
3857
3858 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3859                                  struct extent_record *rec)
3860 {
3861         if (rec->content_checked && rec->owner_ref_checked &&
3862             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3863             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3864             !rec->bad_full_backref && !rec->crossing_stripes &&
3865             !rec->wrong_chunk_type) {
3866                 remove_cache_extent(extent_cache, &rec->cache);
3867                 free_all_extent_backrefs(rec);
3868                 list_del_init(&rec->list);
3869                 free(rec);
3870         }
3871         return 0;
3872 }
3873
3874 static int check_owner_ref(struct btrfs_root *root,
3875                             struct extent_record *rec,
3876                             struct extent_buffer *buf)
3877 {
3878         struct extent_backref *node;
3879         struct tree_backref *back;
3880         struct btrfs_root *ref_root;
3881         struct btrfs_key key;
3882         struct btrfs_path path;
3883         struct extent_buffer *parent;
3884         int level;
3885         int found = 0;
3886         int ret;
3887
3888         list_for_each_entry(node, &rec->backrefs, list) {
3889                 if (node->is_data)
3890                         continue;
3891                 if (!node->found_ref)
3892                         continue;
3893                 if (node->full_backref)
3894                         continue;
3895                 back = (struct tree_backref *)node;
3896                 if (btrfs_header_owner(buf) == back->root)
3897                         return 0;
3898         }
3899         BUG_ON(rec->is_root);
3900
3901         /* try to find the block by search corresponding fs tree */
3902         key.objectid = btrfs_header_owner(buf);
3903         key.type = BTRFS_ROOT_ITEM_KEY;
3904         key.offset = (u64)-1;
3905
3906         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3907         if (IS_ERR(ref_root))
3908                 return 1;
3909
3910         level = btrfs_header_level(buf);
3911         if (level == 0)
3912                 btrfs_item_key_to_cpu(buf, &key, 0);
3913         else
3914                 btrfs_node_key_to_cpu(buf, &key, 0);
3915
3916         btrfs_init_path(&path);
3917         path.lowest_level = level + 1;
3918         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3919         if (ret < 0)
3920                 return 0;
3921
3922         parent = path.nodes[level + 1];
3923         if (parent && buf->start == btrfs_node_blockptr(parent,
3924                                                         path.slots[level + 1]))
3925                 found = 1;
3926
3927         btrfs_release_path(&path);
3928         return found ? 0 : 1;
3929 }
3930
3931 static int is_extent_tree_record(struct extent_record *rec)
3932 {
3933         struct list_head *cur = rec->backrefs.next;
3934         struct extent_backref *node;
3935         struct tree_backref *back;
3936         int is_extent = 0;
3937
3938         while(cur != &rec->backrefs) {
3939                 node = list_entry(cur, struct extent_backref, list);
3940                 cur = cur->next;
3941                 if (node->is_data)
3942                         return 0;
3943                 back = (struct tree_backref *)node;
3944                 if (node->full_backref)
3945                         return 0;
3946                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3947                         is_extent = 1;
3948         }
3949         return is_extent;
3950 }
3951
3952
3953 static int record_bad_block_io(struct btrfs_fs_info *info,
3954                                struct cache_tree *extent_cache,
3955                                u64 start, u64 len)
3956 {
3957         struct extent_record *rec;
3958         struct cache_extent *cache;
3959         struct btrfs_key key;
3960
3961         cache = lookup_cache_extent(extent_cache, start, len);
3962         if (!cache)
3963                 return 0;
3964
3965         rec = container_of(cache, struct extent_record, cache);
3966         if (!is_extent_tree_record(rec))
3967                 return 0;
3968
3969         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3970         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3971 }
3972
3973 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3974                        struct extent_buffer *buf, int slot)
3975 {
3976         if (btrfs_header_level(buf)) {
3977                 struct btrfs_key_ptr ptr1, ptr2;
3978
3979                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3980                                    sizeof(struct btrfs_key_ptr));
3981                 read_extent_buffer(buf, &ptr2,
3982                                    btrfs_node_key_ptr_offset(slot + 1),
3983                                    sizeof(struct btrfs_key_ptr));
3984                 write_extent_buffer(buf, &ptr1,
3985                                     btrfs_node_key_ptr_offset(slot + 1),
3986                                     sizeof(struct btrfs_key_ptr));
3987                 write_extent_buffer(buf, &ptr2,
3988                                     btrfs_node_key_ptr_offset(slot),
3989                                     sizeof(struct btrfs_key_ptr));
3990                 if (slot == 0) {
3991                         struct btrfs_disk_key key;
3992                         btrfs_node_key(buf, &key, 0);
3993                         btrfs_fixup_low_keys(root, path, &key,
3994                                              btrfs_header_level(buf) + 1);
3995                 }
3996         } else {
3997                 struct btrfs_item *item1, *item2;
3998                 struct btrfs_key k1, k2;
3999                 char *item1_data, *item2_data;
4000                 u32 item1_offset, item2_offset, item1_size, item2_size;
4001
4002                 item1 = btrfs_item_nr(slot);
4003                 item2 = btrfs_item_nr(slot + 1);
4004                 btrfs_item_key_to_cpu(buf, &k1, slot);
4005                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4006                 item1_offset = btrfs_item_offset(buf, item1);
4007                 item2_offset = btrfs_item_offset(buf, item2);
4008                 item1_size = btrfs_item_size(buf, item1);
4009                 item2_size = btrfs_item_size(buf, item2);
4010
4011                 item1_data = malloc(item1_size);
4012                 if (!item1_data)
4013                         return -ENOMEM;
4014                 item2_data = malloc(item2_size);
4015                 if (!item2_data) {
4016                         free(item1_data);
4017                         return -ENOMEM;
4018                 }
4019
4020                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4021                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4022
4023                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4024                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4025                 free(item1_data);
4026                 free(item2_data);
4027
4028                 btrfs_set_item_offset(buf, item1, item2_offset);
4029                 btrfs_set_item_offset(buf, item2, item1_offset);
4030                 btrfs_set_item_size(buf, item1, item2_size);
4031                 btrfs_set_item_size(buf, item2, item1_size);
4032
4033                 path->slots[0] = slot;
4034                 btrfs_set_item_key_unsafe(root, path, &k2);
4035                 path->slots[0] = slot + 1;
4036                 btrfs_set_item_key_unsafe(root, path, &k1);
4037         }
4038         return 0;
4039 }
4040
4041 static int fix_key_order(struct btrfs_trans_handle *trans,
4042                          struct btrfs_root *root,
4043                          struct btrfs_path *path)
4044 {
4045         struct extent_buffer *buf;
4046         struct btrfs_key k1, k2;
4047         int i;
4048         int level = path->lowest_level;
4049         int ret = -EIO;
4050
4051         buf = path->nodes[level];
4052         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4053                 if (level) {
4054                         btrfs_node_key_to_cpu(buf, &k1, i);
4055                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4056                 } else {
4057                         btrfs_item_key_to_cpu(buf, &k1, i);
4058                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4059                 }
4060                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4061                         continue;
4062                 ret = swap_values(root, path, buf, i);
4063                 if (ret)
4064                         break;
4065                 btrfs_mark_buffer_dirty(buf);
4066                 i = 0;
4067         }
4068         return ret;
4069 }
4070
4071 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4072                              struct btrfs_root *root,
4073                              struct btrfs_path *path,
4074                              struct extent_buffer *buf, int slot)
4075 {
4076         struct btrfs_key key;
4077         int nritems = btrfs_header_nritems(buf);
4078
4079         btrfs_item_key_to_cpu(buf, &key, slot);
4080
4081         /* These are all the keys we can deal with missing. */
4082         if (key.type != BTRFS_DIR_INDEX_KEY &&
4083             key.type != BTRFS_EXTENT_ITEM_KEY &&
4084             key.type != BTRFS_METADATA_ITEM_KEY &&
4085             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4086             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4087                 return -1;
4088
4089         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4090                (unsigned long long)key.objectid, key.type,
4091                (unsigned long long)key.offset, slot, buf->start);
4092         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4093                               btrfs_item_nr_offset(slot + 1),
4094                               sizeof(struct btrfs_item) *
4095                               (nritems - slot - 1));
4096         btrfs_set_header_nritems(buf, nritems - 1);
4097         if (slot == 0) {
4098                 struct btrfs_disk_key disk_key;
4099
4100                 btrfs_item_key(buf, &disk_key, 0);
4101                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4102         }
4103         btrfs_mark_buffer_dirty(buf);
4104         return 0;
4105 }
4106
4107 static int fix_item_offset(struct btrfs_trans_handle *trans,
4108                            struct btrfs_root *root,
4109                            struct btrfs_path *path)
4110 {
4111         struct extent_buffer *buf;
4112         int i;
4113         int ret = 0;
4114
4115         /* We should only get this for leaves */
4116         BUG_ON(path->lowest_level);
4117         buf = path->nodes[0];
4118 again:
4119         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4120                 unsigned int shift = 0, offset;
4121
4122                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4123                     BTRFS_LEAF_DATA_SIZE(root)) {
4124                         if (btrfs_item_end_nr(buf, i) >
4125                             BTRFS_LEAF_DATA_SIZE(root)) {
4126                                 ret = delete_bogus_item(trans, root, path,
4127                                                         buf, i);
4128                                 if (!ret)
4129                                         goto again;
4130                                 fprintf(stderr, "item is off the end of the "
4131                                         "leaf, can't fix\n");
4132                                 ret = -EIO;
4133                                 break;
4134                         }
4135                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4136                                 btrfs_item_end_nr(buf, i);
4137                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4138                            btrfs_item_offset_nr(buf, i - 1)) {
4139                         if (btrfs_item_end_nr(buf, i) >
4140                             btrfs_item_offset_nr(buf, i - 1)) {
4141                                 ret = delete_bogus_item(trans, root, path,
4142                                                         buf, i);
4143                                 if (!ret)
4144                                         goto again;
4145                                 fprintf(stderr, "items overlap, can't fix\n");
4146                                 ret = -EIO;
4147                                 break;
4148                         }
4149                         shift = btrfs_item_offset_nr(buf, i - 1) -
4150                                 btrfs_item_end_nr(buf, i);
4151                 }
4152                 if (!shift)
4153                         continue;
4154
4155                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4156                        i, shift, (unsigned long long)buf->start);
4157                 offset = btrfs_item_offset_nr(buf, i);
4158                 memmove_extent_buffer(buf,
4159                                       btrfs_leaf_data(buf) + offset + shift,
4160                                       btrfs_leaf_data(buf) + offset,
4161                                       btrfs_item_size_nr(buf, i));
4162                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4163                                       offset + shift);
4164                 btrfs_mark_buffer_dirty(buf);
4165         }
4166
4167         /*
4168          * We may have moved things, in which case we want to exit so we don't
4169          * write those changes out.  Once we have proper abort functionality in
4170          * progs this can be changed to something nicer.
4171          */
4172         BUG_ON(ret);
4173         return ret;
4174 }
4175
4176 /*
4177  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4178  * then just return -EIO.
4179  */
4180 static int try_to_fix_bad_block(struct btrfs_root *root,
4181                                 struct extent_buffer *buf,
4182                                 enum btrfs_tree_block_status status)
4183 {
4184         struct btrfs_trans_handle *trans;
4185         struct ulist *roots;
4186         struct ulist_node *node;
4187         struct btrfs_root *search_root;
4188         struct btrfs_path *path;
4189         struct ulist_iterator iter;
4190         struct btrfs_key root_key, key;
4191         int ret;
4192
4193         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4194             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4195                 return -EIO;
4196
4197         path = btrfs_alloc_path();
4198         if (!path)
4199                 return -EIO;
4200
4201         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4202                                    0, &roots);
4203         if (ret) {
4204                 btrfs_free_path(path);
4205                 return -EIO;
4206         }
4207
4208         ULIST_ITER_INIT(&iter);
4209         while ((node = ulist_next(roots, &iter))) {
4210                 root_key.objectid = node->val;
4211                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4212                 root_key.offset = (u64)-1;
4213
4214                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4215                 if (IS_ERR(root)) {
4216                         ret = -EIO;
4217                         break;
4218                 }
4219
4220
4221                 trans = btrfs_start_transaction(search_root, 0);
4222                 if (IS_ERR(trans)) {
4223                         ret = PTR_ERR(trans);
4224                         break;
4225                 }
4226
4227                 path->lowest_level = btrfs_header_level(buf);
4228                 path->skip_check_block = 1;
4229                 if (path->lowest_level)
4230                         btrfs_node_key_to_cpu(buf, &key, 0);
4231                 else
4232                         btrfs_item_key_to_cpu(buf, &key, 0);
4233                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4234                 if (ret) {
4235                         ret = -EIO;
4236                         btrfs_commit_transaction(trans, search_root);
4237                         break;
4238                 }
4239                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4240                         ret = fix_key_order(trans, search_root, path);
4241                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4242                         ret = fix_item_offset(trans, search_root, path);
4243                 if (ret) {
4244                         btrfs_commit_transaction(trans, search_root);
4245                         break;
4246                 }
4247                 btrfs_release_path(path);
4248                 btrfs_commit_transaction(trans, search_root);
4249         }
4250         ulist_free(roots);
4251         btrfs_free_path(path);
4252         return ret;
4253 }
4254
4255 static int check_block(struct btrfs_root *root,
4256                        struct cache_tree *extent_cache,
4257                        struct extent_buffer *buf, u64 flags)
4258 {
4259         struct extent_record *rec;
4260         struct cache_extent *cache;
4261         struct btrfs_key key;
4262         enum btrfs_tree_block_status status;
4263         int ret = 0;
4264         int level;
4265
4266         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4267         if (!cache)
4268                 return 1;
4269         rec = container_of(cache, struct extent_record, cache);
4270         rec->generation = btrfs_header_generation(buf);
4271
4272         level = btrfs_header_level(buf);
4273         if (btrfs_header_nritems(buf) > 0) {
4274
4275                 if (level == 0)
4276                         btrfs_item_key_to_cpu(buf, &key, 0);
4277                 else
4278                         btrfs_node_key_to_cpu(buf, &key, 0);
4279
4280                 rec->info_objectid = key.objectid;
4281         }
4282         rec->info_level = level;
4283
4284         if (btrfs_is_leaf(buf))
4285                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4286         else
4287                 status = btrfs_check_node(root, &rec->parent_key, buf);
4288
4289         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4290                 if (repair)
4291                         status = try_to_fix_bad_block(root, buf, status);
4292                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4293                         ret = -EIO;
4294                         fprintf(stderr, "bad block %llu\n",
4295                                 (unsigned long long)buf->start);
4296                 } else {
4297                         /*
4298                          * Signal to callers we need to start the scan over
4299                          * again since we'll have cow'ed blocks.
4300                          */
4301                         ret = -EAGAIN;
4302                 }
4303         } else {
4304                 rec->content_checked = 1;
4305                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4306                         rec->owner_ref_checked = 1;
4307                 else {
4308                         ret = check_owner_ref(root, rec, buf);
4309                         if (!ret)
4310                                 rec->owner_ref_checked = 1;
4311                 }
4312         }
4313         if (!ret)
4314                 maybe_free_extent_rec(extent_cache, rec);
4315         return ret;
4316 }
4317
4318 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4319                                                 u64 parent, u64 root)
4320 {
4321         struct list_head *cur = rec->backrefs.next;
4322         struct extent_backref *node;
4323         struct tree_backref *back;
4324
4325         while(cur != &rec->backrefs) {
4326                 node = list_entry(cur, struct extent_backref, list);
4327                 cur = cur->next;
4328                 if (node->is_data)
4329                         continue;
4330                 back = (struct tree_backref *)node;
4331                 if (parent > 0) {
4332                         if (!node->full_backref)
4333                                 continue;
4334                         if (parent == back->parent)
4335                                 return back;
4336                 } else {
4337                         if (node->full_backref)
4338                                 continue;
4339                         if (back->root == root)
4340                                 return back;
4341                 }
4342         }
4343         return NULL;
4344 }
4345
4346 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4347                                                 u64 parent, u64 root)
4348 {
4349         struct tree_backref *ref = malloc(sizeof(*ref));
4350         memset(&ref->node, 0, sizeof(ref->node));
4351         if (parent > 0) {
4352                 ref->parent = parent;
4353                 ref->node.full_backref = 1;
4354         } else {
4355                 ref->root = root;
4356                 ref->node.full_backref = 0;
4357         }
4358         list_add_tail(&ref->node.list, &rec->backrefs);
4359
4360         return ref;
4361 }
4362
4363 static struct data_backref *find_data_backref(struct extent_record *rec,
4364                                                 u64 parent, u64 root,
4365                                                 u64 owner, u64 offset,
4366                                                 int found_ref,
4367                                                 u64 disk_bytenr, u64 bytes)
4368 {
4369         struct list_head *cur = rec->backrefs.next;
4370         struct extent_backref *node;
4371         struct data_backref *back;
4372
4373         while(cur != &rec->backrefs) {
4374                 node = list_entry(cur, struct extent_backref, list);
4375                 cur = cur->next;
4376                 if (!node->is_data)
4377                         continue;
4378                 back = (struct data_backref *)node;
4379                 if (parent > 0) {
4380                         if (!node->full_backref)
4381                                 continue;
4382                         if (parent == back->parent)
4383                                 return back;
4384                 } else {
4385                         if (node->full_backref)
4386                                 continue;
4387                         if (back->root == root && back->owner == owner &&
4388                             back->offset == offset) {
4389                                 if (found_ref && node->found_ref &&
4390                                     (back->bytes != bytes ||
4391                                     back->disk_bytenr != disk_bytenr))
4392                                         continue;
4393                                 return back;
4394                         }
4395                 }
4396         }
4397         return NULL;
4398 }
4399
4400 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4401                                                 u64 parent, u64 root,
4402                                                 u64 owner, u64 offset,
4403                                                 u64 max_size)
4404 {
4405         struct data_backref *ref = malloc(sizeof(*ref));
4406         memset(&ref->node, 0, sizeof(ref->node));
4407         ref->node.is_data = 1;
4408
4409         if (parent > 0) {
4410                 ref->parent = parent;
4411                 ref->owner = 0;
4412                 ref->offset = 0;
4413                 ref->node.full_backref = 1;
4414         } else {
4415                 ref->root = root;
4416                 ref->owner = owner;
4417                 ref->offset = offset;
4418                 ref->node.full_backref = 0;
4419         }
4420         ref->bytes = max_size;
4421         ref->found_ref = 0;
4422         ref->num_refs = 0;
4423         list_add_tail(&ref->node.list, &rec->backrefs);
4424         if (max_size > rec->max_size)
4425                 rec->max_size = max_size;
4426         return ref;
4427 }
4428
4429 /* Check if the type of extent matches with its chunk */
4430 static void check_extent_type(struct extent_record *rec)
4431 {
4432         struct btrfs_block_group_cache *bg_cache;
4433
4434         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4435         if (!bg_cache)
4436                 return;
4437
4438         /* data extent, check chunk directly*/
4439         if (!rec->metadata) {
4440                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4441                         rec->wrong_chunk_type = 1;
4442                 return;
4443         }
4444
4445         /* metadata extent, check the obvious case first */
4446         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4447                                  BTRFS_BLOCK_GROUP_METADATA))) {
4448                 rec->wrong_chunk_type = 1;
4449                 return;
4450         }
4451
4452         /*
4453          * Check SYSTEM extent, as it's also marked as metadata, we can only
4454          * make sure it's a SYSTEM extent by its backref
4455          */
4456         if (!list_empty(&rec->backrefs)) {
4457                 struct extent_backref *node;
4458                 struct tree_backref *tback;
4459                 u64 bg_type;
4460
4461                 node = list_entry(rec->backrefs.next, struct extent_backref,
4462                                   list);
4463                 if (node->is_data) {
4464                         /* tree block shouldn't have data backref */
4465                         rec->wrong_chunk_type = 1;
4466                         return;
4467                 }
4468                 tback = container_of(node, struct tree_backref, node);
4469
4470                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4471                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4472                 else
4473                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4474                 if (!(bg_cache->flags & bg_type))
4475                         rec->wrong_chunk_type = 1;
4476         }
4477 }
4478
4479 static int add_extent_rec(struct cache_tree *extent_cache,
4480                           struct btrfs_key *parent_key, u64 parent_gen,
4481                           u64 start, u64 nr, u64 extent_item_refs,
4482                           int is_root, int inc_ref, int set_checked,
4483                           int metadata, int extent_rec, u64 max_size)
4484 {
4485         struct extent_record *rec;
4486         struct cache_extent *cache;
4487         int ret = 0;
4488         int dup = 0;
4489
4490         cache = lookup_cache_extent(extent_cache, start, nr);
4491         if (cache) {
4492                 rec = container_of(cache, struct extent_record, cache);
4493                 if (inc_ref)
4494                         rec->refs++;
4495                 if (rec->nr == 1)
4496                         rec->nr = max(nr, max_size);
4497
4498                 /*
4499                  * We need to make sure to reset nr to whatever the extent
4500                  * record says was the real size, this way we can compare it to
4501                  * the backrefs.
4502                  */
4503                 if (extent_rec) {
4504                         if (start != rec->start || rec->found_rec) {
4505                                 struct extent_record *tmp;
4506
4507                                 dup = 1;
4508                                 if (list_empty(&rec->list))
4509                                         list_add_tail(&rec->list,
4510                                                       &duplicate_extents);
4511
4512                                 /*
4513                                  * We have to do this song and dance in case we
4514                                  * find an extent record that falls inside of
4515                                  * our current extent record but does not have
4516                                  * the same objectid.
4517                                  */
4518                                 tmp = malloc(sizeof(*tmp));
4519                                 if (!tmp)
4520                                         return -ENOMEM;
4521                                 tmp->start = start;
4522                                 tmp->max_size = max_size;
4523                                 tmp->nr = nr;
4524                                 tmp->found_rec = 1;
4525                                 tmp->metadata = metadata;
4526                                 tmp->extent_item_refs = extent_item_refs;
4527                                 INIT_LIST_HEAD(&tmp->list);
4528                                 list_add_tail(&tmp->list, &rec->dups);
4529                                 rec->num_duplicates++;
4530                         } else {
4531                                 rec->nr = nr;
4532                                 rec->found_rec = 1;
4533                         }
4534                 }
4535
4536                 if (extent_item_refs && !dup) {
4537                         if (rec->extent_item_refs) {
4538                                 fprintf(stderr, "block %llu rec "
4539                                         "extent_item_refs %llu, passed %llu\n",
4540                                         (unsigned long long)start,
4541                                         (unsigned long long)
4542                                                         rec->extent_item_refs,
4543                                         (unsigned long long)extent_item_refs);
4544                         }
4545                         rec->extent_item_refs = extent_item_refs;
4546                 }
4547                 if (is_root)
4548                         rec->is_root = 1;
4549                 if (set_checked) {
4550                         rec->content_checked = 1;
4551                         rec->owner_ref_checked = 1;
4552                 }
4553
4554                 if (parent_key)
4555                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4556                 if (parent_gen)
4557                         rec->parent_generation = parent_gen;
4558
4559                 if (rec->max_size < max_size)
4560                         rec->max_size = max_size;
4561
4562                 /*
4563                  * A metadata extent can't cross stripe_len boundary, otherwise
4564                  * kernel scrub won't be able to handle it.
4565                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4566                  * it.
4567                  */
4568                 if (metadata && check_crossing_stripes(rec->start,
4569                                                        rec->max_size))
4570                                 rec->crossing_stripes = 1;
4571                 check_extent_type(rec);
4572                 maybe_free_extent_rec(extent_cache, rec);
4573                 return ret;
4574         }
4575         rec = malloc(sizeof(*rec));
4576         rec->start = start;
4577         rec->max_size = max_size;
4578         rec->nr = max(nr, max_size);
4579         rec->found_rec = !!extent_rec;
4580         rec->content_checked = 0;
4581         rec->owner_ref_checked = 0;
4582         rec->num_duplicates = 0;
4583         rec->metadata = metadata;
4584         rec->flag_block_full_backref = -1;
4585         rec->bad_full_backref = 0;
4586         rec->crossing_stripes = 0;
4587         rec->wrong_chunk_type = 0;
4588         INIT_LIST_HEAD(&rec->backrefs);
4589         INIT_LIST_HEAD(&rec->dups);
4590         INIT_LIST_HEAD(&rec->list);
4591
4592         if (is_root)
4593                 rec->is_root = 1;
4594         else
4595                 rec->is_root = 0;
4596
4597         if (inc_ref)
4598                 rec->refs = 1;
4599         else
4600                 rec->refs = 0;
4601
4602         if (extent_item_refs)
4603                 rec->extent_item_refs = extent_item_refs;
4604         else
4605                 rec->extent_item_refs = 0;
4606
4607         if (parent_key)
4608                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4609         else
4610                 memset(&rec->parent_key, 0, sizeof(*parent_key));
4611
4612         if (parent_gen)
4613                 rec->parent_generation = parent_gen;
4614         else
4615                 rec->parent_generation = 0;
4616
4617         rec->cache.start = start;
4618         rec->cache.size = nr;
4619         ret = insert_cache_extent(extent_cache, &rec->cache);
4620         BUG_ON(ret);
4621         bytes_used += nr;
4622         if (set_checked) {
4623                 rec->content_checked = 1;
4624                 rec->owner_ref_checked = 1;
4625         }
4626
4627         if (metadata)
4628                 if (check_crossing_stripes(rec->start, rec->max_size))
4629                         rec->crossing_stripes = 1;
4630         check_extent_type(rec);
4631         return ret;
4632 }
4633
4634 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4635                             u64 parent, u64 root, int found_ref)
4636 {
4637         struct extent_record *rec;
4638         struct tree_backref *back;
4639         struct cache_extent *cache;
4640
4641         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4642         if (!cache) {
4643                 add_extent_rec(extent_cache, NULL, 0, bytenr,
4644                                1, 0, 0, 0, 0, 1, 0, 0);
4645                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4646                 if (!cache)
4647                         abort();
4648         }
4649
4650         rec = container_of(cache, struct extent_record, cache);
4651         if (rec->start != bytenr) {
4652                 abort();
4653         }
4654
4655         back = find_tree_backref(rec, parent, root);
4656         if (!back)
4657                 back = alloc_tree_backref(rec, parent, root);
4658
4659         if (found_ref) {
4660                 if (back->node.found_ref) {
4661                         fprintf(stderr, "Extent back ref already exists "
4662                                 "for %llu parent %llu root %llu \n",
4663                                 (unsigned long long)bytenr,
4664                                 (unsigned long long)parent,
4665                                 (unsigned long long)root);
4666                 }
4667                 back->node.found_ref = 1;
4668         } else {
4669                 if (back->node.found_extent_tree) {
4670                         fprintf(stderr, "Extent back ref already exists "
4671                                 "for %llu parent %llu root %llu \n",
4672                                 (unsigned long long)bytenr,
4673                                 (unsigned long long)parent,
4674                                 (unsigned long long)root);
4675                 }
4676                 back->node.found_extent_tree = 1;
4677         }
4678         check_extent_type(rec);
4679         maybe_free_extent_rec(extent_cache, rec);
4680         return 0;
4681 }
4682
4683 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4684                             u64 parent, u64 root, u64 owner, u64 offset,
4685                             u32 num_refs, int found_ref, u64 max_size)
4686 {
4687         struct extent_record *rec;
4688         struct data_backref *back;
4689         struct cache_extent *cache;
4690
4691         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4692         if (!cache) {
4693                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4694                                0, 0, max_size);
4695                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4696                 if (!cache)
4697                         abort();
4698         }
4699
4700         rec = container_of(cache, struct extent_record, cache);
4701         if (rec->max_size < max_size)
4702                 rec->max_size = max_size;
4703
4704         /*
4705          * If found_ref is set then max_size is the real size and must match the
4706          * existing refs.  So if we have already found a ref then we need to
4707          * make sure that this ref matches the existing one, otherwise we need
4708          * to add a new backref so we can notice that the backrefs don't match
4709          * and we need to figure out who is telling the truth.  This is to
4710          * account for that awful fsync bug I introduced where we'd end up with
4711          * a btrfs_file_extent_item that would have its length include multiple
4712          * prealloc extents or point inside of a prealloc extent.
4713          */
4714         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4715                                  bytenr, max_size);
4716         if (!back)
4717                 back = alloc_data_backref(rec, parent, root, owner, offset,
4718                                           max_size);
4719
4720         if (found_ref) {
4721                 BUG_ON(num_refs != 1);
4722                 if (back->node.found_ref)
4723                         BUG_ON(back->bytes != max_size);
4724                 back->node.found_ref = 1;
4725                 back->found_ref += 1;
4726                 back->bytes = max_size;
4727                 back->disk_bytenr = bytenr;
4728                 rec->refs += 1;
4729                 rec->content_checked = 1;
4730                 rec->owner_ref_checked = 1;
4731         } else {
4732                 if (back->node.found_extent_tree) {
4733                         fprintf(stderr, "Extent back ref already exists "
4734                                 "for %llu parent %llu root %llu "
4735                                 "owner %llu offset %llu num_refs %lu\n",
4736                                 (unsigned long long)bytenr,
4737                                 (unsigned long long)parent,
4738                                 (unsigned long long)root,
4739                                 (unsigned long long)owner,
4740                                 (unsigned long long)offset,
4741                                 (unsigned long)num_refs);
4742                 }
4743                 back->num_refs = num_refs;
4744                 back->node.found_extent_tree = 1;
4745         }
4746         maybe_free_extent_rec(extent_cache, rec);
4747         return 0;
4748 }
4749
4750 static int add_pending(struct cache_tree *pending,
4751                        struct cache_tree *seen, u64 bytenr, u32 size)
4752 {
4753         int ret;
4754         ret = add_cache_extent(seen, bytenr, size);
4755         if (ret)
4756                 return ret;
4757         add_cache_extent(pending, bytenr, size);
4758         return 0;
4759 }
4760
4761 static int pick_next_pending(struct cache_tree *pending,
4762                         struct cache_tree *reada,
4763                         struct cache_tree *nodes,
4764                         u64 last, struct block_info *bits, int bits_nr,
4765                         int *reada_bits)
4766 {
4767         unsigned long node_start = last;
4768         struct cache_extent *cache;
4769         int ret;
4770
4771         cache = search_cache_extent(reada, 0);
4772         if (cache) {
4773                 bits[0].start = cache->start;
4774                 bits[0].size = cache->size;
4775                 *reada_bits = 1;
4776                 return 1;
4777         }
4778         *reada_bits = 0;
4779         if (node_start > 32768)
4780                 node_start -= 32768;
4781
4782         cache = search_cache_extent(nodes, node_start);
4783         if (!cache)
4784                 cache = search_cache_extent(nodes, 0);
4785
4786         if (!cache) {
4787                  cache = search_cache_extent(pending, 0);
4788                  if (!cache)
4789                          return 0;
4790                  ret = 0;
4791                  do {
4792                          bits[ret].start = cache->start;
4793                          bits[ret].size = cache->size;
4794                          cache = next_cache_extent(cache);
4795                          ret++;
4796                  } while (cache && ret < bits_nr);
4797                  return ret;
4798         }
4799
4800         ret = 0;
4801         do {
4802                 bits[ret].start = cache->start;
4803                 bits[ret].size = cache->size;
4804                 cache = next_cache_extent(cache);
4805                 ret++;
4806         } while (cache && ret < bits_nr);
4807
4808         if (bits_nr - ret > 8) {
4809                 u64 lookup = bits[0].start + bits[0].size;
4810                 struct cache_extent *next;
4811                 next = search_cache_extent(pending, lookup);
4812                 while(next) {
4813                         if (next->start - lookup > 32768)
4814                                 break;
4815                         bits[ret].start = next->start;
4816                         bits[ret].size = next->size;
4817                         lookup = next->start + next->size;
4818                         ret++;
4819                         if (ret == bits_nr)
4820                                 break;
4821                         next = next_cache_extent(next);
4822                         if (!next)
4823                                 break;
4824                 }
4825         }
4826         return ret;
4827 }
4828
4829 static void free_chunk_record(struct cache_extent *cache)
4830 {
4831         struct chunk_record *rec;
4832
4833         rec = container_of(cache, struct chunk_record, cache);
4834         list_del_init(&rec->list);
4835         list_del_init(&rec->dextents);
4836         free(rec);
4837 }
4838
4839 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4840 {
4841         cache_tree_free_extents(chunk_cache, free_chunk_record);
4842 }
4843
4844 static void free_device_record(struct rb_node *node)
4845 {
4846         struct device_record *rec;
4847
4848         rec = container_of(node, struct device_record, node);
4849         free(rec);
4850 }
4851
4852 FREE_RB_BASED_TREE(device_cache, free_device_record);
4853
4854 int insert_block_group_record(struct block_group_tree *tree,
4855                               struct block_group_record *bg_rec)
4856 {
4857         int ret;
4858
4859         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4860         if (ret)
4861                 return ret;
4862
4863         list_add_tail(&bg_rec->list, &tree->block_groups);
4864         return 0;
4865 }
4866
4867 static void free_block_group_record(struct cache_extent *cache)
4868 {
4869         struct block_group_record *rec;
4870
4871         rec = container_of(cache, struct block_group_record, cache);
4872         list_del_init(&rec->list);
4873         free(rec);
4874 }
4875
4876 void free_block_group_tree(struct block_group_tree *tree)
4877 {
4878         cache_tree_free_extents(&tree->tree, free_block_group_record);
4879 }
4880
4881 int insert_device_extent_record(struct device_extent_tree *tree,
4882                                 struct device_extent_record *de_rec)
4883 {
4884         int ret;
4885
4886         /*
4887          * Device extent is a bit different from the other extents, because
4888          * the extents which belong to the different devices may have the
4889          * same start and size, so we need use the special extent cache
4890          * search/insert functions.
4891          */
4892         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4893         if (ret)
4894                 return ret;
4895
4896         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4897         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4898         return 0;
4899 }
4900
4901 static void free_device_extent_record(struct cache_extent *cache)
4902 {
4903         struct device_extent_record *rec;
4904
4905         rec = container_of(cache, struct device_extent_record, cache);
4906         if (!list_empty(&rec->chunk_list))
4907                 list_del_init(&rec->chunk_list);
4908         if (!list_empty(&rec->device_list))
4909                 list_del_init(&rec->device_list);
4910         free(rec);
4911 }
4912
4913 void free_device_extent_tree(struct device_extent_tree *tree)
4914 {
4915         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4916 }
4917
4918 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4919 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4920                                  struct extent_buffer *leaf, int slot)
4921 {
4922         struct btrfs_extent_ref_v0 *ref0;
4923         struct btrfs_key key;
4924
4925         btrfs_item_key_to_cpu(leaf, &key, slot);
4926         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4927         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4928                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4929         } else {
4930                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4931                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4932         }
4933         return 0;
4934 }
4935 #endif
4936
4937 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4938                                             struct btrfs_key *key,
4939                                             int slot)
4940 {
4941         struct btrfs_chunk *ptr;
4942         struct chunk_record *rec;
4943         int num_stripes, i;
4944
4945         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4946         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4947
4948         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4949         if (!rec) {
4950                 fprintf(stderr, "memory allocation failed\n");
4951                 exit(-1);
4952         }
4953
4954         INIT_LIST_HEAD(&rec->list);
4955         INIT_LIST_HEAD(&rec->dextents);
4956         rec->bg_rec = NULL;
4957
4958         rec->cache.start = key->offset;
4959         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4960
4961         rec->generation = btrfs_header_generation(leaf);
4962
4963         rec->objectid = key->objectid;
4964         rec->type = key->type;
4965         rec->offset = key->offset;
4966
4967         rec->length = rec->cache.size;
4968         rec->owner = btrfs_chunk_owner(leaf, ptr);
4969         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4970         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4971         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4972         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4973         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4974         rec->num_stripes = num_stripes;
4975         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4976
4977         for (i = 0; i < rec->num_stripes; ++i) {
4978                 rec->stripes[i].devid =
4979                         btrfs_stripe_devid_nr(leaf, ptr, i);
4980                 rec->stripes[i].offset =
4981                         btrfs_stripe_offset_nr(leaf, ptr, i);
4982                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4983                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4984                                 BTRFS_UUID_SIZE);
4985         }
4986
4987         return rec;
4988 }
4989
4990 static int process_chunk_item(struct cache_tree *chunk_cache,
4991                               struct btrfs_key *key, struct extent_buffer *eb,
4992                               int slot)
4993 {
4994         struct chunk_record *rec;
4995         int ret = 0;
4996
4997         rec = btrfs_new_chunk_record(eb, key, slot);
4998         ret = insert_cache_extent(chunk_cache, &rec->cache);
4999         if (ret) {
5000                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5001                         rec->offset, rec->length);
5002                 free(rec);
5003         }
5004
5005         return ret;
5006 }
5007
5008 static int process_device_item(struct rb_root *dev_cache,
5009                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5010 {
5011         struct btrfs_dev_item *ptr;
5012         struct device_record *rec;
5013         int ret = 0;
5014
5015         ptr = btrfs_item_ptr(eb,
5016                 slot, struct btrfs_dev_item);
5017
5018         rec = malloc(sizeof(*rec));
5019         if (!rec) {
5020                 fprintf(stderr, "memory allocation failed\n");
5021                 return -ENOMEM;
5022         }
5023
5024         rec->devid = key->offset;
5025         rec->generation = btrfs_header_generation(eb);
5026
5027         rec->objectid = key->objectid;
5028         rec->type = key->type;
5029         rec->offset = key->offset;
5030
5031         rec->devid = btrfs_device_id(eb, ptr);
5032         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5033         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5034
5035         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5036         if (ret) {
5037                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5038                 free(rec);
5039         }
5040
5041         return ret;
5042 }
5043
5044 struct block_group_record *
5045 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5046                              int slot)
5047 {
5048         struct btrfs_block_group_item *ptr;
5049         struct block_group_record *rec;
5050
5051         rec = calloc(1, sizeof(*rec));
5052         if (!rec) {
5053                 fprintf(stderr, "memory allocation failed\n");
5054                 exit(-1);
5055         }
5056
5057         rec->cache.start = key->objectid;
5058         rec->cache.size = key->offset;
5059
5060         rec->generation = btrfs_header_generation(leaf);
5061
5062         rec->objectid = key->objectid;
5063         rec->type = key->type;
5064         rec->offset = key->offset;
5065
5066         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5067         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5068
5069         INIT_LIST_HEAD(&rec->list);
5070
5071         return rec;
5072 }
5073
5074 static int process_block_group_item(struct block_group_tree *block_group_cache,
5075                                     struct btrfs_key *key,
5076                                     struct extent_buffer *eb, int slot)
5077 {
5078         struct block_group_record *rec;
5079         int ret = 0;
5080
5081         rec = btrfs_new_block_group_record(eb, key, slot);
5082         ret = insert_block_group_record(block_group_cache, rec);
5083         if (ret) {
5084                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5085                         rec->objectid, rec->offset);
5086                 free(rec);
5087         }
5088
5089         return ret;
5090 }
5091
5092 struct device_extent_record *
5093 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5094                                struct btrfs_key *key, int slot)
5095 {
5096         struct device_extent_record *rec;
5097         struct btrfs_dev_extent *ptr;
5098
5099         rec = calloc(1, sizeof(*rec));
5100         if (!rec) {
5101                 fprintf(stderr, "memory allocation failed\n");
5102                 exit(-1);
5103         }
5104
5105         rec->cache.objectid = key->objectid;
5106         rec->cache.start = key->offset;
5107
5108         rec->generation = btrfs_header_generation(leaf);
5109
5110         rec->objectid = key->objectid;
5111         rec->type = key->type;
5112         rec->offset = key->offset;
5113
5114         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5115         rec->chunk_objecteid =
5116                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5117         rec->chunk_offset =
5118                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5119         rec->length = btrfs_dev_extent_length(leaf, ptr);
5120         rec->cache.size = rec->length;
5121
5122         INIT_LIST_HEAD(&rec->chunk_list);
5123         INIT_LIST_HEAD(&rec->device_list);
5124
5125         return rec;
5126 }
5127
5128 static int
5129 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5130                            struct btrfs_key *key, struct extent_buffer *eb,
5131                            int slot)
5132 {
5133         struct device_extent_record *rec;
5134         int ret;
5135
5136         rec = btrfs_new_device_extent_record(eb, key, slot);
5137         ret = insert_device_extent_record(dev_extent_cache, rec);
5138         if (ret) {
5139                 fprintf(stderr,
5140                         "Device extent[%llu, %llu, %llu] existed.\n",
5141                         rec->objectid, rec->offset, rec->length);
5142                 free(rec);
5143         }
5144
5145         return ret;
5146 }
5147
5148 static int process_extent_item(struct btrfs_root *root,
5149                                struct cache_tree *extent_cache,
5150                                struct extent_buffer *eb, int slot)
5151 {
5152         struct btrfs_extent_item *ei;
5153         struct btrfs_extent_inline_ref *iref;
5154         struct btrfs_extent_data_ref *dref;
5155         struct btrfs_shared_data_ref *sref;
5156         struct btrfs_key key;
5157         unsigned long end;
5158         unsigned long ptr;
5159         int type;
5160         u32 item_size = btrfs_item_size_nr(eb, slot);
5161         u64 refs = 0;
5162         u64 offset;
5163         u64 num_bytes;
5164         int metadata = 0;
5165
5166         btrfs_item_key_to_cpu(eb, &key, slot);
5167
5168         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5169                 metadata = 1;
5170                 num_bytes = root->leafsize;
5171         } else {
5172                 num_bytes = key.offset;
5173         }
5174
5175         if (item_size < sizeof(*ei)) {
5176 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5177                 struct btrfs_extent_item_v0 *ei0;
5178                 BUG_ON(item_size != sizeof(*ei0));
5179                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5180                 refs = btrfs_extent_refs_v0(eb, ei0);
5181 #else
5182                 BUG();
5183 #endif
5184                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5185                                       num_bytes, refs, 0, 0, 0, metadata, 1,
5186                                       num_bytes);
5187         }
5188
5189         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5190         refs = btrfs_extent_refs(eb, ei);
5191         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5192                 metadata = 1;
5193         else
5194                 metadata = 0;
5195
5196         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5197                        refs, 0, 0, 0, metadata, 1, num_bytes);
5198
5199         ptr = (unsigned long)(ei + 1);
5200         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5201             key.type == BTRFS_EXTENT_ITEM_KEY)
5202                 ptr += sizeof(struct btrfs_tree_block_info);
5203
5204         end = (unsigned long)ei + item_size;
5205         while (ptr < end) {
5206                 iref = (struct btrfs_extent_inline_ref *)ptr;
5207                 type = btrfs_extent_inline_ref_type(eb, iref);
5208                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5209                 switch (type) {
5210                 case BTRFS_TREE_BLOCK_REF_KEY:
5211                         add_tree_backref(extent_cache, key.objectid,
5212                                          0, offset, 0);
5213                         break;
5214                 case BTRFS_SHARED_BLOCK_REF_KEY:
5215                         add_tree_backref(extent_cache, key.objectid,
5216                                          offset, 0, 0);
5217                         break;
5218                 case BTRFS_EXTENT_DATA_REF_KEY:
5219                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5220                         add_data_backref(extent_cache, key.objectid, 0,
5221                                         btrfs_extent_data_ref_root(eb, dref),
5222                                         btrfs_extent_data_ref_objectid(eb,
5223                                                                        dref),
5224                                         btrfs_extent_data_ref_offset(eb, dref),
5225                                         btrfs_extent_data_ref_count(eb, dref),
5226                                         0, num_bytes);
5227                         break;
5228                 case BTRFS_SHARED_DATA_REF_KEY:
5229                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5230                         add_data_backref(extent_cache, key.objectid, offset,
5231                                         0, 0, 0,
5232                                         btrfs_shared_data_ref_count(eb, sref),
5233                                         0, num_bytes);
5234                         break;
5235                 default:
5236                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5237                                 key.objectid, key.type, num_bytes);
5238                         goto out;
5239                 }
5240                 ptr += btrfs_extent_inline_ref_size(type);
5241         }
5242         WARN_ON(ptr > end);
5243 out:
5244         return 0;
5245 }
5246
5247 static int check_cache_range(struct btrfs_root *root,
5248                              struct btrfs_block_group_cache *cache,
5249                              u64 offset, u64 bytes)
5250 {
5251         struct btrfs_free_space *entry;
5252         u64 *logical;
5253         u64 bytenr;
5254         int stripe_len;
5255         int i, nr, ret;
5256
5257         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5258                 bytenr = btrfs_sb_offset(i);
5259                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5260                                        cache->key.objectid, bytenr, 0,
5261                                        &logical, &nr, &stripe_len);
5262                 if (ret)
5263                         return ret;
5264
5265                 while (nr--) {
5266                         if (logical[nr] + stripe_len <= offset)
5267                                 continue;
5268                         if (offset + bytes <= logical[nr])
5269                                 continue;
5270                         if (logical[nr] == offset) {
5271                                 if (stripe_len >= bytes) {
5272                                         kfree(logical);
5273                                         return 0;
5274                                 }
5275                                 bytes -= stripe_len;
5276                                 offset += stripe_len;
5277                         } else if (logical[nr] < offset) {
5278                                 if (logical[nr] + stripe_len >=
5279                                     offset + bytes) {
5280                                         kfree(logical);
5281                                         return 0;
5282                                 }
5283                                 bytes = (offset + bytes) -
5284                                         (logical[nr] + stripe_len);
5285                                 offset = logical[nr] + stripe_len;
5286                         } else {
5287                                 /*
5288                                  * Could be tricky, the super may land in the
5289                                  * middle of the area we're checking.  First
5290                                  * check the easiest case, it's at the end.
5291                                  */
5292                                 if (logical[nr] + stripe_len >=
5293                                     bytes + offset) {
5294                                         bytes = logical[nr] - offset;
5295                                         continue;
5296                                 }
5297
5298                                 /* Check the left side */
5299                                 ret = check_cache_range(root, cache,
5300                                                         offset,
5301                                                         logical[nr] - offset);
5302                                 if (ret) {
5303                                         kfree(logical);
5304                                         return ret;
5305                                 }
5306
5307                                 /* Now we continue with the right side */
5308                                 bytes = (offset + bytes) -
5309                                         (logical[nr] + stripe_len);
5310                                 offset = logical[nr] + stripe_len;
5311                         }
5312                 }
5313
5314                 kfree(logical);
5315         }
5316
5317         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5318         if (!entry) {
5319                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5320                         offset, offset+bytes);
5321                 return -EINVAL;
5322         }
5323
5324         if (entry->offset != offset) {
5325                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5326                         entry->offset);
5327                 return -EINVAL;
5328         }
5329
5330         if (entry->bytes != bytes) {
5331                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5332                         bytes, entry->bytes, offset);
5333                 return -EINVAL;
5334         }
5335
5336         unlink_free_space(cache->free_space_ctl, entry);
5337         free(entry);
5338         return 0;
5339 }
5340
5341 static int verify_space_cache(struct btrfs_root *root,
5342                               struct btrfs_block_group_cache *cache)
5343 {
5344         struct btrfs_path *path;
5345         struct extent_buffer *leaf;
5346         struct btrfs_key key;
5347         u64 last;
5348         int ret = 0;
5349
5350         path = btrfs_alloc_path();
5351         if (!path)
5352                 return -ENOMEM;
5353
5354         root = root->fs_info->extent_root;
5355
5356         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5357
5358         key.objectid = last;
5359         key.offset = 0;
5360         key.type = BTRFS_EXTENT_ITEM_KEY;
5361
5362         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5363         if (ret < 0)
5364                 goto out;
5365         ret = 0;
5366         while (1) {
5367                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5368                         ret = btrfs_next_leaf(root, path);
5369                         if (ret < 0)
5370                                 goto out;
5371                         if (ret > 0) {
5372                                 ret = 0;
5373                                 break;
5374                         }
5375                 }
5376                 leaf = path->nodes[0];
5377                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5378                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5379                         break;
5380                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5381                     key.type != BTRFS_METADATA_ITEM_KEY) {
5382                         path->slots[0]++;
5383                         continue;
5384                 }
5385
5386                 if (last == key.objectid) {
5387                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5388                                 last = key.objectid + key.offset;
5389                         else
5390                                 last = key.objectid + root->leafsize;
5391                         path->slots[0]++;
5392                         continue;
5393                 }
5394
5395                 ret = check_cache_range(root, cache, last,
5396                                         key.objectid - last);
5397                 if (ret)
5398                         break;
5399                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5400                         last = key.objectid + key.offset;
5401                 else
5402                         last = key.objectid + root->leafsize;
5403                 path->slots[0]++;
5404         }
5405
5406         if (last < cache->key.objectid + cache->key.offset)
5407                 ret = check_cache_range(root, cache, last,
5408                                         cache->key.objectid +
5409                                         cache->key.offset - last);
5410
5411 out:
5412         btrfs_free_path(path);
5413
5414         if (!ret &&
5415             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5416                 fprintf(stderr, "There are still entries left in the space "
5417                         "cache\n");
5418                 ret = -EINVAL;
5419         }
5420
5421         return ret;
5422 }
5423
5424 static int check_space_cache(struct btrfs_root *root)
5425 {
5426         struct btrfs_block_group_cache *cache;
5427         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5428         int ret;
5429         int error = 0;
5430
5431         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5432             btrfs_super_generation(root->fs_info->super_copy) !=
5433             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5434                 printf("cache and super generation don't match, space cache "
5435                        "will be invalidated\n");
5436                 return 0;
5437         }
5438
5439         if (ctx.progress_enabled) {
5440                 ctx.tp = TASK_FREE_SPACE;
5441                 task_start(ctx.info);
5442         }
5443
5444         while (1) {
5445                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5446                 if (!cache)
5447                         break;
5448
5449                 start = cache->key.objectid + cache->key.offset;
5450                 if (!cache->free_space_ctl) {
5451                         if (btrfs_init_free_space_ctl(cache,
5452                                                       root->sectorsize)) {
5453                                 ret = -ENOMEM;
5454                                 break;
5455                         }
5456                 } else {
5457                         btrfs_remove_free_space_cache(cache);
5458                 }
5459
5460                 ret = load_free_space_cache(root->fs_info, cache);
5461                 if (!ret)
5462                         continue;
5463
5464                 ret = verify_space_cache(root, cache);
5465                 if (ret) {
5466                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
5467                                 cache->key.objectid);
5468                         error++;
5469                 }
5470         }
5471
5472         task_stop(ctx.info);
5473
5474         return error ? -EINVAL : 0;
5475 }
5476
5477 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5478                         u64 num_bytes, unsigned long leaf_offset,
5479                         struct extent_buffer *eb) {
5480
5481         u64 offset = 0;
5482         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5483         char *data;
5484         unsigned long csum_offset;
5485         u32 csum;
5486         u32 csum_expected;
5487         u64 read_len;
5488         u64 data_checked = 0;
5489         u64 tmp;
5490         int ret = 0;
5491         int mirror;
5492         int num_copies;
5493
5494         if (num_bytes % root->sectorsize)
5495                 return -EINVAL;
5496
5497         data = malloc(num_bytes);
5498         if (!data)
5499                 return -ENOMEM;
5500
5501         while (offset < num_bytes) {
5502                 mirror = 0;
5503 again:
5504                 read_len = num_bytes - offset;
5505                 /* read as much space once a time */
5506                 ret = read_extent_data(root, data + offset,
5507                                 bytenr + offset, &read_len, mirror);
5508                 if (ret)
5509                         goto out;
5510                 data_checked = 0;
5511                 /* verify every 4k data's checksum */
5512                 while (data_checked < read_len) {
5513                         csum = ~(u32)0;
5514                         tmp = offset + data_checked;
5515
5516                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5517                                                csum, root->sectorsize);
5518                         btrfs_csum_final(csum, (char *)&csum);
5519
5520                         csum_offset = leaf_offset +
5521                                  tmp / root->sectorsize * csum_size;
5522                         read_extent_buffer(eb, (char *)&csum_expected,
5523                                            csum_offset, csum_size);
5524                         /* try another mirror */
5525                         if (csum != csum_expected) {
5526                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5527                                                 mirror, bytenr + tmp,
5528                                                 csum, csum_expected);
5529                                 num_copies = btrfs_num_copies(
5530                                                 &root->fs_info->mapping_tree,
5531                                                 bytenr, num_bytes);
5532                                 if (mirror < num_copies - 1) {
5533                                         mirror += 1;
5534                                         goto again;
5535                                 }
5536                         }
5537                         data_checked += root->sectorsize;
5538                 }
5539                 offset += read_len;
5540         }
5541 out:
5542         free(data);
5543         return ret;
5544 }
5545
5546 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5547                                u64 num_bytes)
5548 {
5549         struct btrfs_path *path;
5550         struct extent_buffer *leaf;
5551         struct btrfs_key key;
5552         int ret;
5553
5554         path = btrfs_alloc_path();
5555         if (!path) {
5556                 fprintf(stderr, "Error allocing path\n");
5557                 return -ENOMEM;
5558         }
5559
5560         key.objectid = bytenr;
5561         key.type = BTRFS_EXTENT_ITEM_KEY;
5562         key.offset = (u64)-1;
5563
5564 again:
5565         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5566                                 0, 0);
5567         if (ret < 0) {
5568                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5569                 btrfs_free_path(path);
5570                 return ret;
5571         } else if (ret) {
5572                 if (path->slots[0] > 0) {
5573                         path->slots[0]--;
5574                 } else {
5575                         ret = btrfs_prev_leaf(root, path);
5576                         if (ret < 0) {
5577                                 goto out;
5578                         } else if (ret > 0) {
5579                                 ret = 0;
5580                                 goto out;
5581                         }
5582                 }
5583         }
5584
5585         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5586
5587         /*
5588          * Block group items come before extent items if they have the same
5589          * bytenr, so walk back one more just in case.  Dear future traveler,
5590          * first congrats on mastering time travel.  Now if it's not too much
5591          * trouble could you go back to 2006 and tell Chris to make the
5592          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5593          * EXTENT_ITEM_KEY please?
5594          */
5595         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5596                 if (path->slots[0] > 0) {
5597                         path->slots[0]--;
5598                 } else {
5599                         ret = btrfs_prev_leaf(root, path);
5600                         if (ret < 0) {
5601                                 goto out;
5602                         } else if (ret > 0) {
5603                                 ret = 0;
5604                                 goto out;
5605                         }
5606                 }
5607                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5608         }
5609
5610         while (num_bytes) {
5611                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5612                         ret = btrfs_next_leaf(root, path);
5613                         if (ret < 0) {
5614                                 fprintf(stderr, "Error going to next leaf "
5615                                         "%d\n", ret);
5616                                 btrfs_free_path(path);
5617                                 return ret;
5618                         } else if (ret) {
5619                                 break;
5620                         }
5621                 }
5622                 leaf = path->nodes[0];
5623                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5624                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5625                         path->slots[0]++;
5626                         continue;
5627                 }
5628                 if (key.objectid + key.offset < bytenr) {
5629                         path->slots[0]++;
5630                         continue;
5631                 }
5632                 if (key.objectid > bytenr + num_bytes)
5633                         break;
5634
5635                 if (key.objectid == bytenr) {
5636                         if (key.offset >= num_bytes) {
5637                                 num_bytes = 0;
5638                                 break;
5639                         }
5640                         num_bytes -= key.offset;
5641                         bytenr += key.offset;
5642                 } else if (key.objectid < bytenr) {
5643                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5644                                 num_bytes = 0;
5645                                 break;
5646                         }
5647                         num_bytes = (bytenr + num_bytes) -
5648                                 (key.objectid + key.offset);
5649                         bytenr = key.objectid + key.offset;
5650                 } else {
5651                         if (key.objectid + key.offset < bytenr + num_bytes) {
5652                                 u64 new_start = key.objectid + key.offset;
5653                                 u64 new_bytes = bytenr + num_bytes - new_start;
5654
5655                                 /*
5656                                  * Weird case, the extent is in the middle of
5657                                  * our range, we'll have to search one side
5658                                  * and then the other.  Not sure if this happens
5659                                  * in real life, but no harm in coding it up
5660                                  * anyway just in case.
5661                                  */
5662                                 btrfs_release_path(path);
5663                                 ret = check_extent_exists(root, new_start,
5664                                                           new_bytes);
5665                                 if (ret) {
5666                                         fprintf(stderr, "Right section didn't "
5667                                                 "have a record\n");
5668                                         break;
5669                                 }
5670                                 num_bytes = key.objectid - bytenr;
5671                                 goto again;
5672                         }
5673                         num_bytes = key.objectid - bytenr;
5674                 }
5675                 path->slots[0]++;
5676         }
5677         ret = 0;
5678
5679 out:
5680         if (num_bytes && !ret) {
5681                 fprintf(stderr, "There are no extents for csum range "
5682                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5683                 ret = 1;
5684         }
5685
5686         btrfs_free_path(path);
5687         return ret;
5688 }
5689
5690 static int check_csums(struct btrfs_root *root)
5691 {
5692         struct btrfs_path *path;
5693         struct extent_buffer *leaf;
5694         struct btrfs_key key;
5695         u64 offset = 0, num_bytes = 0;
5696         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5697         int errors = 0;
5698         int ret;
5699         u64 data_len;
5700         unsigned long leaf_offset;
5701
5702         root = root->fs_info->csum_root;
5703         if (!extent_buffer_uptodate(root->node)) {
5704                 fprintf(stderr, "No valid csum tree found\n");
5705                 return -ENOENT;
5706         }
5707
5708         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5709         key.type = BTRFS_EXTENT_CSUM_KEY;
5710         key.offset = 0;
5711
5712         path = btrfs_alloc_path();
5713         if (!path)
5714                 return -ENOMEM;
5715
5716         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5717         if (ret < 0) {
5718                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5719                 btrfs_free_path(path);
5720                 return ret;
5721         }
5722
5723         if (ret > 0 && path->slots[0])
5724                 path->slots[0]--;
5725         ret = 0;
5726
5727         while (1) {
5728                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5729                         ret = btrfs_next_leaf(root, path);
5730                         if (ret < 0) {
5731                                 fprintf(stderr, "Error going to next leaf "
5732                                         "%d\n", ret);
5733                                 break;
5734                         }
5735                         if (ret)
5736                                 break;
5737                 }
5738                 leaf = path->nodes[0];
5739
5740                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5741                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5742                         path->slots[0]++;
5743                         continue;
5744                 }
5745
5746                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5747                               csum_size) * root->sectorsize;
5748                 if (!check_data_csum)
5749                         goto skip_csum_check;
5750                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5751                 ret = check_extent_csums(root, key.offset, data_len,
5752                                          leaf_offset, leaf);
5753                 if (ret)
5754                         break;
5755 skip_csum_check:
5756                 if (!num_bytes) {
5757                         offset = key.offset;
5758                 } else if (key.offset != offset + num_bytes) {
5759                         ret = check_extent_exists(root, offset, num_bytes);
5760                         if (ret) {
5761                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5762                                         "there is no extent record\n",
5763                                         offset, offset+num_bytes);
5764                                 errors++;
5765                         }
5766                         offset = key.offset;
5767                         num_bytes = 0;
5768                 }
5769                 num_bytes += data_len;
5770                 path->slots[0]++;
5771         }
5772
5773         btrfs_free_path(path);
5774         return errors;
5775 }
5776
5777 static int is_dropped_key(struct btrfs_key *key,
5778                           struct btrfs_key *drop_key) {
5779         if (key->objectid < drop_key->objectid)
5780                 return 1;
5781         else if (key->objectid == drop_key->objectid) {
5782                 if (key->type < drop_key->type)
5783                         return 1;
5784                 else if (key->type == drop_key->type) {
5785                         if (key->offset < drop_key->offset)
5786                                 return 1;
5787                 }
5788         }
5789         return 0;
5790 }
5791
5792 /*
5793  * Here are the rules for FULL_BACKREF.
5794  *
5795  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5796  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5797  *      FULL_BACKREF set.
5798  * 3) We cow'ed the block walking down a reloc tree.  This is impossible to tell
5799  *    if it happened after the relocation occurred since we'll have dropped the
5800  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5801  *    have no real way to know for sure.
5802  *
5803  * We process the blocks one root at a time, and we start from the lowest root
5804  * objectid and go to the highest.  So we can just lookup the owner backref for
5805  * the record and if we don't find it then we know it doesn't exist and we have
5806  * a FULL BACKREF.
5807  *
5808  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5809  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5810  * be set or not and then we can check later once we've gathered all the refs.
5811  */
5812 static int calc_extent_flag(struct btrfs_root *root,
5813                            struct cache_tree *extent_cache,
5814                            struct extent_buffer *buf,
5815                            struct root_item_record *ri,
5816                            u64 *flags)
5817 {
5818         struct extent_record *rec;
5819         struct cache_extent *cache;
5820         struct tree_backref *tback;
5821         u64 owner = 0;
5822
5823         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5824         /* we have added this extent before */
5825         BUG_ON(!cache);
5826         rec = container_of(cache, struct extent_record, cache);
5827
5828         /*
5829          * Except file/reloc tree, we can not have
5830          * FULL BACKREF MODE
5831          */
5832         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5833                 goto normal;
5834         /*
5835          * root node
5836          */
5837         if (buf->start == ri->bytenr)
5838                 goto normal;
5839
5840         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5841                 goto full_backref;
5842
5843         owner = btrfs_header_owner(buf);
5844         if (owner == ri->objectid)
5845                 goto normal;
5846
5847         tback = find_tree_backref(rec, 0, owner);
5848         if (!tback)
5849                 goto full_backref;
5850 normal:
5851         *flags = 0;
5852         if (rec->flag_block_full_backref != -1 &&
5853             rec->flag_block_full_backref != 0)
5854                 rec->bad_full_backref = 1;
5855         return 0;
5856 full_backref:
5857         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5858         if (rec->flag_block_full_backref != -1 &&
5859             rec->flag_block_full_backref != 1)
5860                 rec->bad_full_backref = 1;
5861         return 0;
5862 }
5863
5864 static int run_next_block(struct btrfs_root *root,
5865                           struct block_info *bits,
5866                           int bits_nr,
5867                           u64 *last,
5868                           struct cache_tree *pending,
5869                           struct cache_tree *seen,
5870                           struct cache_tree *reada,
5871                           struct cache_tree *nodes,
5872                           struct cache_tree *extent_cache,
5873                           struct cache_tree *chunk_cache,
5874                           struct rb_root *dev_cache,
5875                           struct block_group_tree *block_group_cache,
5876                           struct device_extent_tree *dev_extent_cache,
5877                           struct root_item_record *ri)
5878 {
5879         struct extent_buffer *buf;
5880         struct extent_record *rec = NULL;
5881         u64 bytenr;
5882         u32 size;
5883         u64 parent;
5884         u64 owner;
5885         u64 flags;
5886         u64 ptr;
5887         u64 gen = 0;
5888         int ret = 0;
5889         int i;
5890         int nritems;
5891         struct btrfs_key key;
5892         struct cache_extent *cache;
5893         int reada_bits;
5894
5895         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5896                                     bits_nr, &reada_bits);
5897         if (nritems == 0)
5898                 return 1;
5899
5900         if (!reada_bits) {
5901                 for(i = 0; i < nritems; i++) {
5902                         ret = add_cache_extent(reada, bits[i].start,
5903                                                bits[i].size);
5904                         if (ret == -EEXIST)
5905                                 continue;
5906
5907                         /* fixme, get the parent transid */
5908                         readahead_tree_block(root, bits[i].start,
5909                                              bits[i].size, 0);
5910                 }
5911         }
5912         *last = bits[0].start;
5913         bytenr = bits[0].start;
5914         size = bits[0].size;
5915
5916         cache = lookup_cache_extent(pending, bytenr, size);
5917         if (cache) {
5918                 remove_cache_extent(pending, cache);
5919                 free(cache);
5920         }
5921         cache = lookup_cache_extent(reada, bytenr, size);
5922         if (cache) {
5923                 remove_cache_extent(reada, cache);
5924                 free(cache);
5925         }
5926         cache = lookup_cache_extent(nodes, bytenr, size);
5927         if (cache) {
5928                 remove_cache_extent(nodes, cache);
5929                 free(cache);
5930         }
5931         cache = lookup_cache_extent(extent_cache, bytenr, size);
5932         if (cache) {
5933                 rec = container_of(cache, struct extent_record, cache);
5934                 gen = rec->parent_generation;
5935         }
5936
5937         /* fixme, get the real parent transid */
5938         buf = read_tree_block(root, bytenr, size, gen);
5939         if (!extent_buffer_uptodate(buf)) {
5940                 record_bad_block_io(root->fs_info,
5941                                     extent_cache, bytenr, size);
5942                 goto out;
5943         }
5944
5945         nritems = btrfs_header_nritems(buf);
5946
5947         flags = 0;
5948         if (!init_extent_tree) {
5949                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5950                                        btrfs_header_level(buf), 1, NULL,
5951                                        &flags);
5952                 if (ret < 0) {
5953                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5954                         if (ret < 0) {
5955                                 fprintf(stderr, "Couldn't calc extent flags\n");
5956                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5957                         }
5958                 }
5959         } else {
5960                 flags = 0;
5961                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5962                 if (ret < 0) {
5963                         fprintf(stderr, "Couldn't calc extent flags\n");
5964                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5965                 }
5966         }
5967
5968         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5969                 if (ri != NULL &&
5970                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5971                     ri->objectid == btrfs_header_owner(buf)) {
5972                         /*
5973                          * Ok we got to this block from it's original owner and
5974                          * we have FULL_BACKREF set.  Relocation can leave
5975                          * converted blocks over so this is altogether possible,
5976                          * however it's not possible if the generation > the
5977                          * last snapshot, so check for this case.
5978                          */
5979                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5980                             btrfs_header_generation(buf) > ri->last_snapshot) {
5981                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5982                                 rec->bad_full_backref = 1;
5983                         }
5984                 }
5985         } else {
5986                 if (ri != NULL &&
5987                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5988                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5989                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5990                         rec->bad_full_backref = 1;
5991                 }
5992         }
5993
5994         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5995                 rec->flag_block_full_backref = 1;
5996                 parent = bytenr;
5997                 owner = 0;
5998         } else {
5999                 rec->flag_block_full_backref = 0;
6000                 parent = 0;
6001                 owner = btrfs_header_owner(buf);
6002         }
6003
6004         ret = check_block(root, extent_cache, buf, flags);
6005         if (ret)
6006                 goto out;
6007
6008         if (btrfs_is_leaf(buf)) {
6009                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6010                 for (i = 0; i < nritems; i++) {
6011                         struct btrfs_file_extent_item *fi;
6012                         btrfs_item_key_to_cpu(buf, &key, i);
6013                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6014                                 process_extent_item(root, extent_cache, buf,
6015                                                     i);
6016                                 continue;
6017                         }
6018                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6019                                 process_extent_item(root, extent_cache, buf,
6020                                                     i);
6021                                 continue;
6022                         }
6023                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6024                                 total_csum_bytes +=
6025                                         btrfs_item_size_nr(buf, i);
6026                                 continue;
6027                         }
6028                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6029                                 process_chunk_item(chunk_cache, &key, buf, i);
6030                                 continue;
6031                         }
6032                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6033                                 process_device_item(dev_cache, &key, buf, i);
6034                                 continue;
6035                         }
6036                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6037                                 process_block_group_item(block_group_cache,
6038                                         &key, buf, i);
6039                                 continue;
6040                         }
6041                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6042                                 process_device_extent_item(dev_extent_cache,
6043                                         &key, buf, i);
6044                                 continue;
6045
6046                         }
6047                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6048 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6049                                 process_extent_ref_v0(extent_cache, buf, i);
6050 #else
6051                                 BUG();
6052 #endif
6053                                 continue;
6054                         }
6055
6056                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6057                                 add_tree_backref(extent_cache, key.objectid, 0,
6058                                                  key.offset, 0);
6059                                 continue;
6060                         }
6061                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6062                                 add_tree_backref(extent_cache, key.objectid,
6063                                                  key.offset, 0, 0);
6064                                 continue;
6065                         }
6066                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6067                                 struct btrfs_extent_data_ref *ref;
6068                                 ref = btrfs_item_ptr(buf, i,
6069                                                 struct btrfs_extent_data_ref);
6070                                 add_data_backref(extent_cache,
6071                                         key.objectid, 0,
6072                                         btrfs_extent_data_ref_root(buf, ref),
6073                                         btrfs_extent_data_ref_objectid(buf,
6074                                                                        ref),
6075                                         btrfs_extent_data_ref_offset(buf, ref),
6076                                         btrfs_extent_data_ref_count(buf, ref),
6077                                         0, root->sectorsize);
6078                                 continue;
6079                         }
6080                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6081                                 struct btrfs_shared_data_ref *ref;
6082                                 ref = btrfs_item_ptr(buf, i,
6083                                                 struct btrfs_shared_data_ref);
6084                                 add_data_backref(extent_cache,
6085                                         key.objectid, key.offset, 0, 0, 0,
6086                                         btrfs_shared_data_ref_count(buf, ref),
6087                                         0, root->sectorsize);
6088                                 continue;
6089                         }
6090                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6091                                 struct bad_item *bad;
6092
6093                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6094                                         continue;
6095                                 if (!owner)
6096                                         continue;
6097                                 bad = malloc(sizeof(struct bad_item));
6098                                 if (!bad)
6099                                         continue;
6100                                 INIT_LIST_HEAD(&bad->list);
6101                                 memcpy(&bad->key, &key,
6102                                        sizeof(struct btrfs_key));
6103                                 bad->root_id = owner;
6104                                 list_add_tail(&bad->list, &delete_items);
6105                                 continue;
6106                         }
6107                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6108                                 continue;
6109                         fi = btrfs_item_ptr(buf, i,
6110                                             struct btrfs_file_extent_item);
6111                         if (btrfs_file_extent_type(buf, fi) ==
6112                             BTRFS_FILE_EXTENT_INLINE)
6113                                 continue;
6114                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6115                                 continue;
6116
6117                         data_bytes_allocated +=
6118                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6119                         if (data_bytes_allocated < root->sectorsize) {
6120                                 abort();
6121                         }
6122                         data_bytes_referenced +=
6123                                 btrfs_file_extent_num_bytes(buf, fi);
6124                         add_data_backref(extent_cache,
6125                                 btrfs_file_extent_disk_bytenr(buf, fi),
6126                                 parent, owner, key.objectid, key.offset -
6127                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6128                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6129                 }
6130         } else {
6131                 int level;
6132                 struct btrfs_key first_key;
6133
6134                 first_key.objectid = 0;
6135
6136                 if (nritems > 0)
6137                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6138                 level = btrfs_header_level(buf);
6139                 for (i = 0; i < nritems; i++) {
6140                         ptr = btrfs_node_blockptr(buf, i);
6141                         size = btrfs_level_size(root, level - 1);
6142                         btrfs_node_key_to_cpu(buf, &key, i);
6143                         if (ri != NULL) {
6144                                 if ((level == ri->drop_level)
6145                                     && is_dropped_key(&key, &ri->drop_key)) {
6146                                         continue;
6147                                 }
6148                         }
6149                         ret = add_extent_rec(extent_cache, &key,
6150                                              btrfs_node_ptr_generation(buf, i),
6151                                              ptr, size, 0, 0, 1, 0, 1, 0,
6152                                              size);
6153                         BUG_ON(ret);
6154
6155                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6156
6157                         if (level > 1) {
6158                                 add_pending(nodes, seen, ptr, size);
6159                         } else {
6160                                 add_pending(pending, seen, ptr, size);
6161                         }
6162                 }
6163                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6164                                       nritems) * sizeof(struct btrfs_key_ptr);
6165         }
6166         total_btree_bytes += buf->len;
6167         if (fs_root_objectid(btrfs_header_owner(buf)))
6168                 total_fs_tree_bytes += buf->len;
6169         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6170                 total_extent_tree_bytes += buf->len;
6171         if (!found_old_backref &&
6172             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6173             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6174             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6175                 found_old_backref = 1;
6176 out:
6177         free_extent_buffer(buf);
6178         return ret;
6179 }
6180
6181 static int add_root_to_pending(struct extent_buffer *buf,
6182                                struct cache_tree *extent_cache,
6183                                struct cache_tree *pending,
6184                                struct cache_tree *seen,
6185                                struct cache_tree *nodes,
6186                                u64 objectid)
6187 {
6188         if (btrfs_header_level(buf) > 0)
6189                 add_pending(nodes, seen, buf->start, buf->len);
6190         else
6191                 add_pending(pending, seen, buf->start, buf->len);
6192         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6193                        0, 1, 1, 0, 1, 0, buf->len);
6194
6195         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6196             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6197                 add_tree_backref(extent_cache, buf->start, buf->start,
6198                                  0, 1);
6199         else
6200                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6201         return 0;
6202 }
6203
6204 /* as we fix the tree, we might be deleting blocks that
6205  * we're tracking for repair.  This hook makes sure we
6206  * remove any backrefs for blocks as we are fixing them.
6207  */
6208 static int free_extent_hook(struct btrfs_trans_handle *trans,
6209                             struct btrfs_root *root,
6210                             u64 bytenr, u64 num_bytes, u64 parent,
6211                             u64 root_objectid, u64 owner, u64 offset,
6212                             int refs_to_drop)
6213 {
6214         struct extent_record *rec;
6215         struct cache_extent *cache;
6216         int is_data;
6217         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6218
6219         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6220         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6221         if (!cache)
6222                 return 0;
6223
6224         rec = container_of(cache, struct extent_record, cache);
6225         if (is_data) {
6226                 struct data_backref *back;
6227                 back = find_data_backref(rec, parent, root_objectid, owner,
6228                                          offset, 1, bytenr, num_bytes);
6229                 if (!back)
6230                         goto out;
6231                 if (back->node.found_ref) {
6232                         back->found_ref -= refs_to_drop;
6233                         if (rec->refs)
6234                                 rec->refs -= refs_to_drop;
6235                 }
6236                 if (back->node.found_extent_tree) {
6237                         back->num_refs -= refs_to_drop;
6238                         if (rec->extent_item_refs)
6239                                 rec->extent_item_refs -= refs_to_drop;
6240                 }
6241                 if (back->found_ref == 0)
6242                         back->node.found_ref = 0;
6243                 if (back->num_refs == 0)
6244                         back->node.found_extent_tree = 0;
6245
6246                 if (!back->node.found_extent_tree && back->node.found_ref) {
6247                         list_del(&back->node.list);
6248                         free(back);
6249                 }
6250         } else {
6251                 struct tree_backref *back;
6252                 back = find_tree_backref(rec, parent, root_objectid);
6253                 if (!back)
6254                         goto out;
6255                 if (back->node.found_ref) {
6256                         if (rec->refs)
6257                                 rec->refs--;
6258                         back->node.found_ref = 0;
6259                 }
6260                 if (back->node.found_extent_tree) {
6261                         if (rec->extent_item_refs)
6262                                 rec->extent_item_refs--;
6263                         back->node.found_extent_tree = 0;
6264                 }
6265                 if (!back->node.found_extent_tree && back->node.found_ref) {
6266                         list_del(&back->node.list);
6267                         free(back);
6268                 }
6269         }
6270         maybe_free_extent_rec(extent_cache, rec);
6271 out:
6272         return 0;
6273 }
6274
6275 static int delete_extent_records(struct btrfs_trans_handle *trans,
6276                                  struct btrfs_root *root,
6277                                  struct btrfs_path *path,
6278                                  u64 bytenr, u64 new_len)
6279 {
6280         struct btrfs_key key;
6281         struct btrfs_key found_key;
6282         struct extent_buffer *leaf;
6283         int ret;
6284         int slot;
6285
6286
6287         key.objectid = bytenr;
6288         key.type = (u8)-1;
6289         key.offset = (u64)-1;
6290
6291         while(1) {
6292                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6293                                         &key, path, 0, 1);
6294                 if (ret < 0)
6295                         break;
6296
6297                 if (ret > 0) {
6298                         ret = 0;
6299                         if (path->slots[0] == 0)
6300                                 break;
6301                         path->slots[0]--;
6302                 }
6303                 ret = 0;
6304
6305                 leaf = path->nodes[0];
6306                 slot = path->slots[0];
6307
6308                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6309                 if (found_key.objectid != bytenr)
6310                         break;
6311
6312                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6313                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6314                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6315                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6316                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6317                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6318                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6319                         btrfs_release_path(path);
6320                         if (found_key.type == 0) {
6321                                 if (found_key.offset == 0)
6322                                         break;
6323                                 key.offset = found_key.offset - 1;
6324                                 key.type = found_key.type;
6325                         }
6326                         key.type = found_key.type - 1;
6327                         key.offset = (u64)-1;
6328                         continue;
6329                 }
6330
6331                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6332                         found_key.objectid, found_key.type, found_key.offset);
6333
6334                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6335                 if (ret)
6336                         break;
6337                 btrfs_release_path(path);
6338
6339                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6340                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6341                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6342                                 found_key.offset : root->leafsize;
6343
6344                         ret = btrfs_update_block_group(trans, root, bytenr,
6345                                                        bytes, 0, 0);
6346                         if (ret)
6347                                 break;
6348                 }
6349         }
6350
6351         btrfs_release_path(path);
6352         return ret;
6353 }
6354
6355 /*
6356  * for a single backref, this will allocate a new extent
6357  * and add the backref to it.
6358  */
6359 static int record_extent(struct btrfs_trans_handle *trans,
6360                          struct btrfs_fs_info *info,
6361                          struct btrfs_path *path,
6362                          struct extent_record *rec,
6363                          struct extent_backref *back,
6364                          int allocated, u64 flags)
6365 {
6366         int ret;
6367         struct btrfs_root *extent_root = info->extent_root;
6368         struct extent_buffer *leaf;
6369         struct btrfs_key ins_key;
6370         struct btrfs_extent_item *ei;
6371         struct tree_backref *tback;
6372         struct data_backref *dback;
6373         struct btrfs_tree_block_info *bi;
6374
6375         if (!back->is_data)
6376                 rec->max_size = max_t(u64, rec->max_size,
6377                                     info->extent_root->leafsize);
6378
6379         if (!allocated) {
6380                 u32 item_size = sizeof(*ei);
6381
6382                 if (!back->is_data)
6383                         item_size += sizeof(*bi);
6384
6385                 ins_key.objectid = rec->start;
6386                 ins_key.offset = rec->max_size;
6387                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6388
6389                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6390                                         &ins_key, item_size);
6391                 if (ret)
6392                         goto fail;
6393
6394                 leaf = path->nodes[0];
6395                 ei = btrfs_item_ptr(leaf, path->slots[0],
6396                                     struct btrfs_extent_item);
6397
6398                 btrfs_set_extent_refs(leaf, ei, 0);
6399                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6400
6401                 if (back->is_data) {
6402                         btrfs_set_extent_flags(leaf, ei,
6403                                                BTRFS_EXTENT_FLAG_DATA);
6404                 } else {
6405                         struct btrfs_disk_key copy_key;;
6406
6407                         tback = (struct tree_backref *)back;
6408                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6409                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6410                                              sizeof(*bi));
6411
6412                         btrfs_set_disk_key_objectid(&copy_key,
6413                                                     rec->info_objectid);
6414                         btrfs_set_disk_key_type(&copy_key, 0);
6415                         btrfs_set_disk_key_offset(&copy_key, 0);
6416
6417                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6418                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6419
6420                         btrfs_set_extent_flags(leaf, ei,
6421                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6422                 }
6423
6424                 btrfs_mark_buffer_dirty(leaf);
6425                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6426                                                rec->max_size, 1, 0);
6427                 if (ret)
6428                         goto fail;
6429                 btrfs_release_path(path);
6430         }
6431
6432         if (back->is_data) {
6433                 u64 parent;
6434                 int i;
6435
6436                 dback = (struct data_backref *)back;
6437                 if (back->full_backref)
6438                         parent = dback->parent;
6439                 else
6440                         parent = 0;
6441
6442                 for (i = 0; i < dback->found_ref; i++) {
6443                         /* if parent != 0, we're doing a full backref
6444                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6445                          * just makes the backref allocator create a data
6446                          * backref
6447                          */
6448                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6449                                                    rec->start, rec->max_size,
6450                                                    parent,
6451                                                    dback->root,
6452                                                    parent ?
6453                                                    BTRFS_FIRST_FREE_OBJECTID :
6454                                                    dback->owner,
6455                                                    dback->offset);
6456                         if (ret)
6457                                 break;
6458                 }
6459                 fprintf(stderr, "adding new data backref"
6460                                 " on %llu %s %llu owner %llu"
6461                                 " offset %llu found %d\n",
6462                                 (unsigned long long)rec->start,
6463                                 back->full_backref ?
6464                                 "parent" : "root",
6465                                 back->full_backref ?
6466                                 (unsigned long long)parent :
6467                                 (unsigned long long)dback->root,
6468                                 (unsigned long long)dback->owner,
6469                                 (unsigned long long)dback->offset,
6470                                 dback->found_ref);
6471         } else {
6472                 u64 parent;
6473
6474                 tback = (struct tree_backref *)back;
6475                 if (back->full_backref)
6476                         parent = tback->parent;
6477                 else
6478                         parent = 0;
6479
6480                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6481                                            rec->start, rec->max_size,
6482                                            parent, tback->root, 0, 0);
6483                 fprintf(stderr, "adding new tree backref on "
6484                         "start %llu len %llu parent %llu root %llu\n",
6485                         rec->start, rec->max_size, parent, tback->root);
6486         }
6487 fail:
6488         btrfs_release_path(path);
6489         return ret;
6490 }
6491
6492 struct extent_entry {
6493         u64 bytenr;
6494         u64 bytes;
6495         int count;
6496         int broken;
6497         struct list_head list;
6498 };
6499
6500 static struct extent_entry *find_entry(struct list_head *entries,
6501                                        u64 bytenr, u64 bytes)
6502 {
6503         struct extent_entry *entry = NULL;
6504
6505         list_for_each_entry(entry, entries, list) {
6506                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6507                         return entry;
6508         }
6509
6510         return NULL;
6511 }
6512
6513 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6514 {
6515         struct extent_entry *entry, *best = NULL, *prev = NULL;
6516
6517         list_for_each_entry(entry, entries, list) {
6518                 if (!prev) {
6519                         prev = entry;
6520                         continue;
6521                 }
6522
6523                 /*
6524                  * If there are as many broken entries as entries then we know
6525                  * not to trust this particular entry.
6526                  */
6527                 if (entry->broken == entry->count)
6528                         continue;
6529
6530                 /*
6531                  * If our current entry == best then we can't be sure our best
6532                  * is really the best, so we need to keep searching.
6533                  */
6534                 if (best && best->count == entry->count) {
6535                         prev = entry;
6536                         best = NULL;
6537                         continue;
6538                 }
6539
6540                 /* Prev == entry, not good enough, have to keep searching */
6541                 if (!prev->broken && prev->count == entry->count)
6542                         continue;
6543
6544                 if (!best)
6545                         best = (prev->count > entry->count) ? prev : entry;
6546                 else if (best->count < entry->count)
6547                         best = entry;
6548                 prev = entry;
6549         }
6550
6551         return best;
6552 }
6553
6554 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6555                       struct data_backref *dback, struct extent_entry *entry)
6556 {
6557         struct btrfs_trans_handle *trans;
6558         struct btrfs_root *root;
6559         struct btrfs_file_extent_item *fi;
6560         struct extent_buffer *leaf;
6561         struct btrfs_key key;
6562         u64 bytenr, bytes;
6563         int ret, err;
6564
6565         key.objectid = dback->root;
6566         key.type = BTRFS_ROOT_ITEM_KEY;
6567         key.offset = (u64)-1;
6568         root = btrfs_read_fs_root(info, &key);
6569         if (IS_ERR(root)) {
6570                 fprintf(stderr, "Couldn't find root for our ref\n");
6571                 return -EINVAL;
6572         }
6573
6574         /*
6575          * The backref points to the original offset of the extent if it was
6576          * split, so we need to search down to the offset we have and then walk
6577          * forward until we find the backref we're looking for.
6578          */
6579         key.objectid = dback->owner;
6580         key.type = BTRFS_EXTENT_DATA_KEY;
6581         key.offset = dback->offset;
6582         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6583         if (ret < 0) {
6584                 fprintf(stderr, "Error looking up ref %d\n", ret);
6585                 return ret;
6586         }
6587
6588         while (1) {
6589                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6590                         ret = btrfs_next_leaf(root, path);
6591                         if (ret) {
6592                                 fprintf(stderr, "Couldn't find our ref, next\n");
6593                                 return -EINVAL;
6594                         }
6595                 }
6596                 leaf = path->nodes[0];
6597                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6598                 if (key.objectid != dback->owner ||
6599                     key.type != BTRFS_EXTENT_DATA_KEY) {
6600                         fprintf(stderr, "Couldn't find our ref, search\n");
6601                         return -EINVAL;
6602                 }
6603                 fi = btrfs_item_ptr(leaf, path->slots[0],
6604                                     struct btrfs_file_extent_item);
6605                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6606                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6607
6608                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6609                         break;
6610                 path->slots[0]++;
6611         }
6612
6613         btrfs_release_path(path);
6614
6615         trans = btrfs_start_transaction(root, 1);
6616         if (IS_ERR(trans))
6617                 return PTR_ERR(trans);
6618
6619         /*
6620          * Ok we have the key of the file extent we want to fix, now we can cow
6621          * down to the thing and fix it.
6622          */
6623         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6624         if (ret < 0) {
6625                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6626                         key.objectid, key.type, key.offset, ret);
6627                 goto out;
6628         }
6629         if (ret > 0) {
6630                 fprintf(stderr, "Well that's odd, we just found this key "
6631                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6632                         key.offset);
6633                 ret = -EINVAL;
6634                 goto out;
6635         }
6636         leaf = path->nodes[0];
6637         fi = btrfs_item_ptr(leaf, path->slots[0],
6638                             struct btrfs_file_extent_item);
6639
6640         if (btrfs_file_extent_compression(leaf, fi) &&
6641             dback->disk_bytenr != entry->bytenr) {
6642                 fprintf(stderr, "Ref doesn't match the record start and is "
6643                         "compressed, please take a btrfs-image of this file "
6644                         "system and send it to a btrfs developer so they can "
6645                         "complete this functionality for bytenr %Lu\n",
6646                         dback->disk_bytenr);
6647                 ret = -EINVAL;
6648                 goto out;
6649         }
6650
6651         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6652                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6653         } else if (dback->disk_bytenr > entry->bytenr) {
6654                 u64 off_diff, offset;
6655
6656                 off_diff = dback->disk_bytenr - entry->bytenr;
6657                 offset = btrfs_file_extent_offset(leaf, fi);
6658                 if (dback->disk_bytenr + offset +
6659                     btrfs_file_extent_num_bytes(leaf, fi) >
6660                     entry->bytenr + entry->bytes) {
6661                         fprintf(stderr, "Ref is past the entry end, please "
6662                                 "take a btrfs-image of this file system and "
6663                                 "send it to a btrfs developer, ref %Lu\n",
6664                                 dback->disk_bytenr);
6665                         ret = -EINVAL;
6666                         goto out;
6667                 }
6668                 offset += off_diff;
6669                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6670                 btrfs_set_file_extent_offset(leaf, fi, offset);
6671         } else if (dback->disk_bytenr < entry->bytenr) {
6672                 u64 offset;
6673
6674                 offset = btrfs_file_extent_offset(leaf, fi);
6675                 if (dback->disk_bytenr + offset < entry->bytenr) {
6676                         fprintf(stderr, "Ref is before the entry start, please"
6677                                 " take a btrfs-image of this file system and "
6678                                 "send it to a btrfs developer, ref %Lu\n",
6679                                 dback->disk_bytenr);
6680                         ret = -EINVAL;
6681                         goto out;
6682                 }
6683
6684                 offset += dback->disk_bytenr;
6685                 offset -= entry->bytenr;
6686                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6687                 btrfs_set_file_extent_offset(leaf, fi, offset);
6688         }
6689
6690         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6691
6692         /*
6693          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6694          * only do this if we aren't using compression, otherwise it's a
6695          * trickier case.
6696          */
6697         if (!btrfs_file_extent_compression(leaf, fi))
6698                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6699         else
6700                 printf("ram bytes may be wrong?\n");
6701         btrfs_mark_buffer_dirty(leaf);
6702 out:
6703         err = btrfs_commit_transaction(trans, root);
6704         btrfs_release_path(path);
6705         return ret ? ret : err;
6706 }
6707
6708 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6709                            struct extent_record *rec)
6710 {
6711         struct extent_backref *back;
6712         struct data_backref *dback;
6713         struct extent_entry *entry, *best = NULL;
6714         LIST_HEAD(entries);
6715         int nr_entries = 0;
6716         int broken_entries = 0;
6717         int ret = 0;
6718         short mismatch = 0;
6719
6720         /*
6721          * Metadata is easy and the backrefs should always agree on bytenr and
6722          * size, if not we've got bigger issues.
6723          */
6724         if (rec->metadata)
6725                 return 0;
6726
6727         list_for_each_entry(back, &rec->backrefs, list) {
6728                 if (back->full_backref || !back->is_data)
6729                         continue;
6730
6731                 dback = (struct data_backref *)back;
6732
6733                 /*
6734                  * We only pay attention to backrefs that we found a real
6735                  * backref for.
6736                  */
6737                 if (dback->found_ref == 0)
6738                         continue;
6739
6740                 /*
6741                  * For now we only catch when the bytes don't match, not the
6742                  * bytenr.  We can easily do this at the same time, but I want
6743                  * to have a fs image to test on before we just add repair
6744                  * functionality willy-nilly so we know we won't screw up the
6745                  * repair.
6746                  */
6747
6748                 entry = find_entry(&entries, dback->disk_bytenr,
6749                                    dback->bytes);
6750                 if (!entry) {
6751                         entry = malloc(sizeof(struct extent_entry));
6752                         if (!entry) {
6753                                 ret = -ENOMEM;
6754                                 goto out;
6755                         }
6756                         memset(entry, 0, sizeof(*entry));
6757                         entry->bytenr = dback->disk_bytenr;
6758                         entry->bytes = dback->bytes;
6759                         list_add_tail(&entry->list, &entries);
6760                         nr_entries++;
6761                 }
6762
6763                 /*
6764                  * If we only have on entry we may think the entries agree when
6765                  * in reality they don't so we have to do some extra checking.
6766                  */
6767                 if (dback->disk_bytenr != rec->start ||
6768                     dback->bytes != rec->nr || back->broken)
6769                         mismatch = 1;
6770
6771                 if (back->broken) {
6772                         entry->broken++;
6773                         broken_entries++;
6774                 }
6775
6776                 entry->count++;
6777         }
6778
6779         /* Yay all the backrefs agree, carry on good sir */
6780         if (nr_entries <= 1 && !mismatch)
6781                 goto out;
6782
6783         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6784                 "%Lu\n", rec->start);
6785
6786         /*
6787          * First we want to see if the backrefs can agree amongst themselves who
6788          * is right, so figure out which one of the entries has the highest
6789          * count.
6790          */
6791         best = find_most_right_entry(&entries);
6792
6793         /*
6794          * Ok so we may have an even split between what the backrefs think, so
6795          * this is where we use the extent ref to see what it thinks.
6796          */
6797         if (!best) {
6798                 entry = find_entry(&entries, rec->start, rec->nr);
6799                 if (!entry && (!broken_entries || !rec->found_rec)) {
6800                         fprintf(stderr, "Backrefs don't agree with each other "
6801                                 "and extent record doesn't agree with anybody,"
6802                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6803                                 rec->start, rec->nr);
6804                         ret = -EINVAL;
6805                         goto out;
6806                 } else if (!entry) {
6807                         /*
6808                          * Ok our backrefs were broken, we'll assume this is the
6809                          * correct value and add an entry for this range.
6810                          */
6811                         entry = malloc(sizeof(struct extent_entry));
6812                         if (!entry) {
6813                                 ret = -ENOMEM;
6814                                 goto out;
6815                         }
6816                         memset(entry, 0, sizeof(*entry));
6817                         entry->bytenr = rec->start;
6818                         entry->bytes = rec->nr;
6819                         list_add_tail(&entry->list, &entries);
6820                         nr_entries++;
6821                 }
6822                 entry->count++;
6823                 best = find_most_right_entry(&entries);
6824                 if (!best) {
6825                         fprintf(stderr, "Backrefs and extent record evenly "
6826                                 "split on who is right, this is going to "
6827                                 "require user input to fix bytenr %Lu bytes "
6828                                 "%Lu\n", rec->start, rec->nr);
6829                         ret = -EINVAL;
6830                         goto out;
6831                 }
6832         }
6833
6834         /*
6835          * I don't think this can happen currently as we'll abort() if we catch
6836          * this case higher up, but in case somebody removes that we still can't
6837          * deal with it properly here yet, so just bail out of that's the case.
6838          */
6839         if (best->bytenr != rec->start) {
6840                 fprintf(stderr, "Extent start and backref starts don't match, "
6841                         "please use btrfs-image on this file system and send "
6842                         "it to a btrfs developer so they can make fsck fix "
6843                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6844                         rec->start, rec->nr);
6845                 ret = -EINVAL;
6846                 goto out;
6847         }
6848
6849         /*
6850          * Ok great we all agreed on an extent record, let's go find the real
6851          * references and fix up the ones that don't match.
6852          */
6853         list_for_each_entry(back, &rec->backrefs, list) {
6854                 if (back->full_backref || !back->is_data)
6855                         continue;
6856
6857                 dback = (struct data_backref *)back;
6858
6859                 /*
6860                  * Still ignoring backrefs that don't have a real ref attached
6861                  * to them.
6862                  */
6863                 if (dback->found_ref == 0)
6864                         continue;
6865
6866                 if (dback->bytes == best->bytes &&
6867                     dback->disk_bytenr == best->bytenr)
6868                         continue;
6869
6870                 ret = repair_ref(info, path, dback, best);
6871                 if (ret)
6872                         goto out;
6873         }
6874
6875         /*
6876          * Ok we messed with the actual refs, which means we need to drop our
6877          * entire cache and go back and rescan.  I know this is a huge pain and
6878          * adds a lot of extra work, but it's the only way to be safe.  Once all
6879          * the backrefs agree we may not need to do anything to the extent
6880          * record itself.
6881          */
6882         ret = -EAGAIN;
6883 out:
6884         while (!list_empty(&entries)) {
6885                 entry = list_entry(entries.next, struct extent_entry, list);
6886                 list_del_init(&entry->list);
6887                 free(entry);
6888         }
6889         return ret;
6890 }
6891
6892 static int process_duplicates(struct btrfs_root *root,
6893                               struct cache_tree *extent_cache,
6894                               struct extent_record *rec)
6895 {
6896         struct extent_record *good, *tmp;
6897         struct cache_extent *cache;
6898         int ret;
6899
6900         /*
6901          * If we found a extent record for this extent then return, or if we
6902          * have more than one duplicate we are likely going to need to delete
6903          * something.
6904          */
6905         if (rec->found_rec || rec->num_duplicates > 1)
6906                 return 0;
6907
6908         /* Shouldn't happen but just in case */
6909         BUG_ON(!rec->num_duplicates);
6910
6911         /*
6912          * So this happens if we end up with a backref that doesn't match the
6913          * actual extent entry.  So either the backref is bad or the extent
6914          * entry is bad.  Either way we want to have the extent_record actually
6915          * reflect what we found in the extent_tree, so we need to take the
6916          * duplicate out and use that as the extent_record since the only way we
6917          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6918          */
6919         remove_cache_extent(extent_cache, &rec->cache);
6920
6921         good = list_entry(rec->dups.next, struct extent_record, list);
6922         list_del_init(&good->list);
6923         INIT_LIST_HEAD(&good->backrefs);
6924         INIT_LIST_HEAD(&good->dups);
6925         good->cache.start = good->start;
6926         good->cache.size = good->nr;
6927         good->content_checked = 0;
6928         good->owner_ref_checked = 0;
6929         good->num_duplicates = 0;
6930         good->refs = rec->refs;
6931         list_splice_init(&rec->backrefs, &good->backrefs);
6932         while (1) {
6933                 cache = lookup_cache_extent(extent_cache, good->start,
6934                                             good->nr);
6935                 if (!cache)
6936                         break;
6937                 tmp = container_of(cache, struct extent_record, cache);
6938
6939                 /*
6940                  * If we find another overlapping extent and it's found_rec is
6941                  * set then it's a duplicate and we need to try and delete
6942                  * something.
6943                  */
6944                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6945                         if (list_empty(&good->list))
6946                                 list_add_tail(&good->list,
6947                                               &duplicate_extents);
6948                         good->num_duplicates += tmp->num_duplicates + 1;
6949                         list_splice_init(&tmp->dups, &good->dups);
6950                         list_del_init(&tmp->list);
6951                         list_add_tail(&tmp->list, &good->dups);
6952                         remove_cache_extent(extent_cache, &tmp->cache);
6953                         continue;
6954                 }
6955
6956                 /*
6957                  * Ok we have another non extent item backed extent rec, so lets
6958                  * just add it to this extent and carry on like we did above.
6959                  */
6960                 good->refs += tmp->refs;
6961                 list_splice_init(&tmp->backrefs, &good->backrefs);
6962                 remove_cache_extent(extent_cache, &tmp->cache);
6963                 free(tmp);
6964         }
6965         ret = insert_cache_extent(extent_cache, &good->cache);
6966         BUG_ON(ret);
6967         free(rec);
6968         return good->num_duplicates ? 0 : 1;
6969 }
6970
6971 static int delete_duplicate_records(struct btrfs_root *root,
6972                                     struct extent_record *rec)
6973 {
6974         struct btrfs_trans_handle *trans;
6975         LIST_HEAD(delete_list);
6976         struct btrfs_path *path;
6977         struct extent_record *tmp, *good, *n;
6978         int nr_del = 0;
6979         int ret = 0, err;
6980         struct btrfs_key key;
6981
6982         path = btrfs_alloc_path();
6983         if (!path) {
6984                 ret = -ENOMEM;
6985                 goto out;
6986         }
6987
6988         good = rec;
6989         /* Find the record that covers all of the duplicates. */
6990         list_for_each_entry(tmp, &rec->dups, list) {
6991                 if (good->start < tmp->start)
6992                         continue;
6993                 if (good->nr > tmp->nr)
6994                         continue;
6995
6996                 if (tmp->start + tmp->nr < good->start + good->nr) {
6997                         fprintf(stderr, "Ok we have overlapping extents that "
6998                                 "aren't completely covered by eachother, this "
6999                                 "is going to require more careful thought.  "
7000                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7001                                 tmp->start, tmp->nr, good->start, good->nr);
7002                         abort();
7003                 }
7004                 good = tmp;
7005         }
7006
7007         if (good != rec)
7008                 list_add_tail(&rec->list, &delete_list);
7009
7010         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7011                 if (tmp == good)
7012                         continue;
7013                 list_move_tail(&tmp->list, &delete_list);
7014         }
7015
7016         root = root->fs_info->extent_root;
7017         trans = btrfs_start_transaction(root, 1);
7018         if (IS_ERR(trans)) {
7019                 ret = PTR_ERR(trans);
7020                 goto out;
7021         }
7022
7023         list_for_each_entry(tmp, &delete_list, list) {
7024                 if (tmp->found_rec == 0)
7025                         continue;
7026                 key.objectid = tmp->start;
7027                 key.type = BTRFS_EXTENT_ITEM_KEY;
7028                 key.offset = tmp->nr;
7029
7030                 /* Shouldn't happen but just in case */
7031                 if (tmp->metadata) {
7032                         fprintf(stderr, "Well this shouldn't happen, extent "
7033                                 "record overlaps but is metadata? "
7034                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7035                         abort();
7036                 }
7037
7038                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7039                 if (ret) {
7040                         if (ret > 0)
7041                                 ret = -EINVAL;
7042                         break;
7043                 }
7044                 ret = btrfs_del_item(trans, root, path);
7045                 if (ret)
7046                         break;
7047                 btrfs_release_path(path);
7048                 nr_del++;
7049         }
7050         err = btrfs_commit_transaction(trans, root);
7051         if (err && !ret)
7052                 ret = err;
7053 out:
7054         while (!list_empty(&delete_list)) {
7055                 tmp = list_entry(delete_list.next, struct extent_record, list);
7056                 list_del_init(&tmp->list);
7057                 if (tmp == rec)
7058                         continue;
7059                 free(tmp);
7060         }
7061
7062         while (!list_empty(&rec->dups)) {
7063                 tmp = list_entry(rec->dups.next, struct extent_record, list);
7064                 list_del_init(&tmp->list);
7065                 free(tmp);
7066         }
7067
7068         btrfs_free_path(path);
7069
7070         if (!ret && !nr_del)
7071                 rec->num_duplicates = 0;
7072
7073         return ret ? ret : nr_del;
7074 }
7075
7076 static int find_possible_backrefs(struct btrfs_fs_info *info,
7077                                   struct btrfs_path *path,
7078                                   struct cache_tree *extent_cache,
7079                                   struct extent_record *rec)
7080 {
7081         struct btrfs_root *root;
7082         struct extent_backref *back;
7083         struct data_backref *dback;
7084         struct cache_extent *cache;
7085         struct btrfs_file_extent_item *fi;
7086         struct btrfs_key key;
7087         u64 bytenr, bytes;
7088         int ret;
7089
7090         list_for_each_entry(back, &rec->backrefs, list) {
7091                 /* Don't care about full backrefs (poor unloved backrefs) */
7092                 if (back->full_backref || !back->is_data)
7093                         continue;
7094
7095                 dback = (struct data_backref *)back;
7096
7097                 /* We found this one, we don't need to do a lookup */
7098                 if (dback->found_ref)
7099                         continue;
7100
7101                 key.objectid = dback->root;
7102                 key.type = BTRFS_ROOT_ITEM_KEY;
7103                 key.offset = (u64)-1;
7104
7105                 root = btrfs_read_fs_root(info, &key);
7106
7107                 /* No root, definitely a bad ref, skip */
7108                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7109                         continue;
7110                 /* Other err, exit */
7111                 if (IS_ERR(root))
7112                         return PTR_ERR(root);
7113
7114                 key.objectid = dback->owner;
7115                 key.type = BTRFS_EXTENT_DATA_KEY;
7116                 key.offset = dback->offset;
7117                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7118                 if (ret) {
7119                         btrfs_release_path(path);
7120                         if (ret < 0)
7121                                 return ret;
7122                         /* Didn't find it, we can carry on */
7123                         ret = 0;
7124                         continue;
7125                 }
7126
7127                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7128                                     struct btrfs_file_extent_item);
7129                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7130                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7131                 btrfs_release_path(path);
7132                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7133                 if (cache) {
7134                         struct extent_record *tmp;
7135                         tmp = container_of(cache, struct extent_record, cache);
7136
7137                         /*
7138                          * If we found an extent record for the bytenr for this
7139                          * particular backref then we can't add it to our
7140                          * current extent record.  We only want to add backrefs
7141                          * that don't have a corresponding extent item in the
7142                          * extent tree since they likely belong to this record
7143                          * and we need to fix it if it doesn't match bytenrs.
7144                          */
7145                         if  (tmp->found_rec)
7146                                 continue;
7147                 }
7148
7149                 dback->found_ref += 1;
7150                 dback->disk_bytenr = bytenr;
7151                 dback->bytes = bytes;
7152
7153                 /*
7154                  * Set this so the verify backref code knows not to trust the
7155                  * values in this backref.
7156                  */
7157                 back->broken = 1;
7158         }
7159
7160         return 0;
7161 }
7162
7163 /*
7164  * Record orphan data ref into corresponding root.
7165  *
7166  * Return 0 if the extent item contains data ref and recorded.
7167  * Return 1 if the extent item contains no useful data ref
7168  *   On that case, it may contains only shared_dataref or metadata backref
7169  *   or the file extent exists(this should be handled by the extent bytenr
7170  *   recovery routine)
7171  * Return <0 if something goes wrong.
7172  */
7173 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7174                                       struct extent_record *rec)
7175 {
7176         struct btrfs_key key;
7177         struct btrfs_root *dest_root;
7178         struct extent_backref *back;
7179         struct data_backref *dback;
7180         struct orphan_data_extent *orphan;
7181         struct btrfs_path *path;
7182         int recorded_data_ref = 0;
7183         int ret = 0;
7184
7185         if (rec->metadata)
7186                 return 1;
7187         path = btrfs_alloc_path();
7188         if (!path)
7189                 return -ENOMEM;
7190         list_for_each_entry(back, &rec->backrefs, list) {
7191                 if (back->full_backref || !back->is_data ||
7192                     !back->found_extent_tree)
7193                         continue;
7194                 dback = (struct data_backref *)back;
7195                 if (dback->found_ref)
7196                         continue;
7197                 key.objectid = dback->root;
7198                 key.type = BTRFS_ROOT_ITEM_KEY;
7199                 key.offset = (u64)-1;
7200
7201                 dest_root = btrfs_read_fs_root(fs_info, &key);
7202
7203                 /* For non-exist root we just skip it */
7204                 if (IS_ERR(dest_root) || !dest_root)
7205                         continue;
7206
7207                 key.objectid = dback->owner;
7208                 key.type = BTRFS_EXTENT_DATA_KEY;
7209                 key.offset = dback->offset;
7210
7211                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7212                 /*
7213                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7214                  * we need to record it for inode/file extent rebuild.
7215                  * For ret > 0, we record it only for file extent rebuild.
7216                  * For ret == 0, the file extent exists but only bytenr
7217                  * mismatch, let the original bytenr fix routine to handle,
7218                  * don't record it.
7219                  */
7220                 if (ret == 0)
7221                         continue;
7222                 ret = 0;
7223                 orphan = malloc(sizeof(*orphan));
7224                 if (!orphan) {
7225                         ret = -ENOMEM;
7226                         goto out;
7227                 }
7228                 INIT_LIST_HEAD(&orphan->list);
7229                 orphan->root = dback->root;
7230                 orphan->objectid = dback->owner;
7231                 orphan->offset = dback->offset;
7232                 orphan->disk_bytenr = rec->cache.start;
7233                 orphan->disk_len = rec->cache.size;
7234                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7235                 recorded_data_ref = 1;
7236         }
7237 out:
7238         btrfs_free_path(path);
7239         if (!ret)
7240                 return !recorded_data_ref;
7241         else
7242                 return ret;
7243 }
7244
7245 /*
7246  * when an incorrect extent item is found, this will delete
7247  * all of the existing entries for it and recreate them
7248  * based on what the tree scan found.
7249  */
7250 static int fixup_extent_refs(struct btrfs_fs_info *info,
7251                              struct cache_tree *extent_cache,
7252                              struct extent_record *rec)
7253 {
7254         struct btrfs_trans_handle *trans = NULL;
7255         int ret;
7256         struct btrfs_path *path;
7257         struct list_head *cur = rec->backrefs.next;
7258         struct cache_extent *cache;
7259         struct extent_backref *back;
7260         int allocated = 0;
7261         u64 flags = 0;
7262
7263         if (rec->flag_block_full_backref)
7264                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7265
7266         path = btrfs_alloc_path();
7267         if (!path)
7268                 return -ENOMEM;
7269
7270         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7271                 /*
7272                  * Sometimes the backrefs themselves are so broken they don't
7273                  * get attached to any meaningful rec, so first go back and
7274                  * check any of our backrefs that we couldn't find and throw
7275                  * them into the list if we find the backref so that
7276                  * verify_backrefs can figure out what to do.
7277                  */
7278                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7279                 if (ret < 0)
7280                         goto out;
7281         }
7282
7283         /* step one, make sure all of the backrefs agree */
7284         ret = verify_backrefs(info, path, rec);
7285         if (ret < 0)
7286                 goto out;
7287
7288         trans = btrfs_start_transaction(info->extent_root, 1);
7289         if (IS_ERR(trans)) {
7290                 ret = PTR_ERR(trans);
7291                 goto out;
7292         }
7293
7294         /* step two, delete all the existing records */
7295         ret = delete_extent_records(trans, info->extent_root, path,
7296                                     rec->start, rec->max_size);
7297
7298         if (ret < 0)
7299                 goto out;
7300
7301         /* was this block corrupt?  If so, don't add references to it */
7302         cache = lookup_cache_extent(info->corrupt_blocks,
7303                                     rec->start, rec->max_size);
7304         if (cache) {
7305                 ret = 0;
7306                 goto out;
7307         }
7308
7309         /* step three, recreate all the refs we did find */
7310         while(cur != &rec->backrefs) {
7311                 back = list_entry(cur, struct extent_backref, list);
7312                 cur = cur->next;
7313
7314                 /*
7315                  * if we didn't find any references, don't create a
7316                  * new extent record
7317                  */
7318                 if (!back->found_ref)
7319                         continue;
7320
7321                 rec->bad_full_backref = 0;
7322                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7323                 allocated = 1;
7324
7325                 if (ret)
7326                         goto out;
7327         }
7328 out:
7329         if (trans) {
7330                 int err = btrfs_commit_transaction(trans, info->extent_root);
7331                 if (!ret)
7332                         ret = err;
7333         }
7334
7335         btrfs_free_path(path);
7336         return ret;
7337 }
7338
7339 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7340                               struct extent_record *rec)
7341 {
7342         struct btrfs_trans_handle *trans;
7343         struct btrfs_root *root = fs_info->extent_root;
7344         struct btrfs_path *path;
7345         struct btrfs_extent_item *ei;
7346         struct btrfs_key key;
7347         u64 flags;
7348         int ret = 0;
7349
7350         key.objectid = rec->start;
7351         if (rec->metadata) {
7352                 key.type = BTRFS_METADATA_ITEM_KEY;
7353                 key.offset = rec->info_level;
7354         } else {
7355                 key.type = BTRFS_EXTENT_ITEM_KEY;
7356                 key.offset = rec->max_size;
7357         }
7358
7359         path = btrfs_alloc_path();
7360         if (!path)
7361                 return -ENOMEM;
7362
7363         trans = btrfs_start_transaction(root, 0);
7364         if (IS_ERR(trans)) {
7365                 btrfs_free_path(path);
7366                 return PTR_ERR(trans);
7367         }
7368
7369         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7370         if (ret < 0) {
7371                 btrfs_free_path(path);
7372                 btrfs_commit_transaction(trans, root);
7373                 return ret;
7374         } else if (ret) {
7375                 fprintf(stderr, "Didn't find extent for %llu\n",
7376                         (unsigned long long)rec->start);
7377                 btrfs_free_path(path);
7378                 btrfs_commit_transaction(trans, root);
7379                 return -ENOENT;
7380         }
7381
7382         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7383                             struct btrfs_extent_item);
7384         flags = btrfs_extent_flags(path->nodes[0], ei);
7385         if (rec->flag_block_full_backref) {
7386                 fprintf(stderr, "setting full backref on %llu\n",
7387                         (unsigned long long)key.objectid);
7388                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7389         } else {
7390                 fprintf(stderr, "clearing full backref on %llu\n",
7391                         (unsigned long long)key.objectid);
7392                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7393         }
7394         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7395         btrfs_mark_buffer_dirty(path->nodes[0]);
7396         btrfs_free_path(path);
7397         return btrfs_commit_transaction(trans, root);
7398 }
7399
7400 /* right now we only prune from the extent allocation tree */
7401 static int prune_one_block(struct btrfs_trans_handle *trans,
7402                            struct btrfs_fs_info *info,
7403                            struct btrfs_corrupt_block *corrupt)
7404 {
7405         int ret;
7406         struct btrfs_path path;
7407         struct extent_buffer *eb;
7408         u64 found;
7409         int slot;
7410         int nritems;
7411         int level = corrupt->level + 1;
7412
7413         btrfs_init_path(&path);
7414 again:
7415         /* we want to stop at the parent to our busted block */
7416         path.lowest_level = level;
7417
7418         ret = btrfs_search_slot(trans, info->extent_root,
7419                                 &corrupt->key, &path, -1, 1);
7420
7421         if (ret < 0)
7422                 goto out;
7423
7424         eb = path.nodes[level];
7425         if (!eb) {
7426                 ret = -ENOENT;
7427                 goto out;
7428         }
7429
7430         /*
7431          * hopefully the search gave us the block we want to prune,
7432          * lets try that first
7433          */
7434         slot = path.slots[level];
7435         found =  btrfs_node_blockptr(eb, slot);
7436         if (found == corrupt->cache.start)
7437                 goto del_ptr;
7438
7439         nritems = btrfs_header_nritems(eb);
7440
7441         /* the search failed, lets scan this node and hope we find it */
7442         for (slot = 0; slot < nritems; slot++) {
7443                 found =  btrfs_node_blockptr(eb, slot);
7444                 if (found == corrupt->cache.start)
7445                         goto del_ptr;
7446         }
7447         /*
7448          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7449          * to this block
7450          */
7451         if (eb == info->extent_root->node) {
7452                 ret = -ENOENT;
7453                 goto out;
7454         } else {
7455                 level++;
7456                 btrfs_release_path(&path);
7457                 goto again;
7458         }
7459
7460 del_ptr:
7461         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7462         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7463
7464 out:
7465         btrfs_release_path(&path);
7466         return ret;
7467 }
7468
7469 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7470 {
7471         struct btrfs_trans_handle *trans = NULL;
7472         struct cache_extent *cache;
7473         struct btrfs_corrupt_block *corrupt;
7474
7475         while (1) {
7476                 cache = search_cache_extent(info->corrupt_blocks, 0);
7477                 if (!cache)
7478                         break;
7479                 if (!trans) {
7480                         trans = btrfs_start_transaction(info->extent_root, 1);
7481                         if (IS_ERR(trans))
7482                                 return PTR_ERR(trans);
7483                 }
7484                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7485                 prune_one_block(trans, info, corrupt);
7486                 remove_cache_extent(info->corrupt_blocks, cache);
7487         }
7488         if (trans)
7489                 return btrfs_commit_transaction(trans, info->extent_root);
7490         return 0;
7491 }
7492
7493 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7494 {
7495         struct btrfs_block_group_cache *cache;
7496         u64 start, end;
7497         int ret;
7498
7499         while (1) {
7500                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7501                                             &start, &end, EXTENT_DIRTY);
7502                 if (ret)
7503                         break;
7504                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7505                                    GFP_NOFS);
7506         }
7507
7508         start = 0;
7509         while (1) {
7510                 cache = btrfs_lookup_first_block_group(fs_info, start);
7511                 if (!cache)
7512                         break;
7513                 if (cache->cached)
7514                         cache->cached = 0;
7515                 start = cache->key.objectid + cache->key.offset;
7516         }
7517 }
7518
7519 static int check_extent_refs(struct btrfs_root *root,
7520                              struct cache_tree *extent_cache)
7521 {
7522         struct extent_record *rec;
7523         struct cache_extent *cache;
7524         int err = 0;
7525         int ret = 0;
7526         int fixed = 0;
7527         int had_dups = 0;
7528         int recorded = 0;
7529
7530         if (repair) {
7531                 /*
7532                  * if we're doing a repair, we have to make sure
7533                  * we don't allocate from the problem extents.
7534                  * In the worst case, this will be all the
7535                  * extents in the FS
7536                  */
7537                 cache = search_cache_extent(extent_cache, 0);
7538                 while(cache) {
7539                         rec = container_of(cache, struct extent_record, cache);
7540                         set_extent_dirty(root->fs_info->excluded_extents,
7541                                          rec->start,
7542                                          rec->start + rec->max_size - 1,
7543                                          GFP_NOFS);
7544                         cache = next_cache_extent(cache);
7545                 }
7546
7547                 /* pin down all the corrupted blocks too */
7548                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7549                 while(cache) {
7550                         set_extent_dirty(root->fs_info->excluded_extents,
7551                                          cache->start,
7552                                          cache->start + cache->size - 1,
7553                                          GFP_NOFS);
7554                         cache = next_cache_extent(cache);
7555                 }
7556                 prune_corrupt_blocks(root->fs_info);
7557                 reset_cached_block_groups(root->fs_info);
7558         }
7559
7560         reset_cached_block_groups(root->fs_info);
7561
7562         /*
7563          * We need to delete any duplicate entries we find first otherwise we
7564          * could mess up the extent tree when we have backrefs that actually
7565          * belong to a different extent item and not the weird duplicate one.
7566          */
7567         while (repair && !list_empty(&duplicate_extents)) {
7568                 rec = list_entry(duplicate_extents.next, struct extent_record,
7569                                  list);
7570                 list_del_init(&rec->list);
7571
7572                 /* Sometimes we can find a backref before we find an actual
7573                  * extent, so we need to process it a little bit to see if there
7574                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7575                  * if this is a backref screwup.  If we need to delete stuff
7576                  * process_duplicates() will return 0, otherwise it will return
7577                  * 1 and we
7578                  */
7579                 if (process_duplicates(root, extent_cache, rec))
7580                         continue;
7581                 ret = delete_duplicate_records(root, rec);
7582                 if (ret < 0)
7583                         return ret;
7584                 /*
7585                  * delete_duplicate_records will return the number of entries
7586                  * deleted, so if it's greater than 0 then we know we actually
7587                  * did something and we need to remove.
7588                  */
7589                 if (ret)
7590                         had_dups = 1;
7591         }
7592
7593         if (had_dups)
7594                 return -EAGAIN;
7595
7596         while(1) {
7597                 int cur_err = 0;
7598
7599                 fixed = 0;
7600                 recorded = 0;
7601                 cache = search_cache_extent(extent_cache, 0);
7602                 if (!cache)
7603                         break;
7604                 rec = container_of(cache, struct extent_record, cache);
7605                 if (rec->num_duplicates) {
7606                         fprintf(stderr, "extent item %llu has multiple extent "
7607                                 "items\n", (unsigned long long)rec->start);
7608                         err = 1;
7609                         cur_err = 1;
7610                 }
7611
7612                 if (rec->refs != rec->extent_item_refs) {
7613                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7614                                 (unsigned long long)rec->start,
7615                                 (unsigned long long)rec->nr);
7616                         fprintf(stderr, "extent item %llu, found %llu\n",
7617                                 (unsigned long long)rec->extent_item_refs,
7618                                 (unsigned long long)rec->refs);
7619                         ret = record_orphan_data_extents(root->fs_info, rec);
7620                         if (ret < 0)
7621                                 goto repair_abort;
7622                         if (ret == 0) {
7623                                 recorded = 1;
7624                         } else {
7625                                 /*
7626                                  * we can't use the extent to repair file
7627                                  * extent, let the fallback method handle it.
7628                                  */
7629                                 if (!fixed && repair) {
7630                                         ret = fixup_extent_refs(
7631                                                         root->fs_info,
7632                                                         extent_cache, rec);
7633                                         if (ret)
7634                                                 goto repair_abort;
7635                                         fixed = 1;
7636                                 }
7637                         }
7638                         err = 1;
7639                         cur_err = 1;
7640                 }
7641                 if (all_backpointers_checked(rec, 1)) {
7642                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7643                                 (unsigned long long)rec->start,
7644                                 (unsigned long long)rec->nr);
7645
7646                         if (!fixed && !recorded && repair) {
7647                                 ret = fixup_extent_refs(root->fs_info,
7648                                                         extent_cache, rec);
7649                                 if (ret)
7650                                         goto repair_abort;
7651                                 fixed = 1;
7652                         }
7653                         cur_err = 1;
7654                         err = 1;
7655                 }
7656                 if (!rec->owner_ref_checked) {
7657                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7658                                 (unsigned long long)rec->start,
7659                                 (unsigned long long)rec->nr);
7660                         if (!fixed && !recorded && repair) {
7661                                 ret = fixup_extent_refs(root->fs_info,
7662                                                         extent_cache, rec);
7663                                 if (ret)
7664                                         goto repair_abort;
7665                                 fixed = 1;
7666                         }
7667                         err = 1;
7668                         cur_err = 1;
7669                 }
7670                 if (rec->bad_full_backref) {
7671                         fprintf(stderr, "bad full backref, on [%llu]\n",
7672                                 (unsigned long long)rec->start);
7673                         if (repair) {
7674                                 ret = fixup_extent_flags(root->fs_info, rec);
7675                                 if (ret)
7676                                         goto repair_abort;
7677                                 fixed = 1;
7678                         }
7679                         err = 1;
7680                         cur_err = 1;
7681                 }
7682                 /*
7683                  * Although it's not a extent ref's problem, we reuse this
7684                  * routine for error reporting.
7685                  * No repair function yet.
7686                  */
7687                 if (rec->crossing_stripes) {
7688                         fprintf(stderr,
7689                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7690                                 rec->start, rec->start + rec->max_size);
7691                         err = 1;
7692                         cur_err = 1;
7693                 }
7694
7695                 if (rec->wrong_chunk_type) {
7696                         fprintf(stderr,
7697                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7698                                 rec->start, rec->start + rec->max_size);
7699                         err = 1;
7700                         cur_err = 1;
7701                 }
7702
7703                 remove_cache_extent(extent_cache, cache);
7704                 free_all_extent_backrefs(rec);
7705                 if (!init_extent_tree && repair && (!cur_err || fixed))
7706                         clear_extent_dirty(root->fs_info->excluded_extents,
7707                                            rec->start,
7708                                            rec->start + rec->max_size - 1,
7709                                            GFP_NOFS);
7710                 free(rec);
7711         }
7712 repair_abort:
7713         if (repair) {
7714                 if (ret && ret != -EAGAIN) {
7715                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7716                         exit(1);
7717                 } else if (!ret) {
7718                         struct btrfs_trans_handle *trans;
7719
7720                         root = root->fs_info->extent_root;
7721                         trans = btrfs_start_transaction(root, 1);
7722                         if (IS_ERR(trans)) {
7723                                 ret = PTR_ERR(trans);
7724                                 goto repair_abort;
7725                         }
7726
7727                         btrfs_fix_block_accounting(trans, root);
7728                         ret = btrfs_commit_transaction(trans, root);
7729                         if (ret)
7730                                 goto repair_abort;
7731                 }
7732                 if (err)
7733                         fprintf(stderr, "repaired damaged extent references\n");
7734                 return ret;
7735         }
7736         return err;
7737 }
7738
7739 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7740 {
7741         u64 stripe_size;
7742
7743         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7744                 stripe_size = length;
7745                 stripe_size /= num_stripes;
7746         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7747                 stripe_size = length * 2;
7748                 stripe_size /= num_stripes;
7749         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7750                 stripe_size = length;
7751                 stripe_size /= (num_stripes - 1);
7752         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7753                 stripe_size = length;
7754                 stripe_size /= (num_stripes - 2);
7755         } else {
7756                 stripe_size = length;
7757         }
7758         return stripe_size;
7759 }
7760
7761 /*
7762  * Check the chunk with its block group/dev list ref:
7763  * Return 0 if all refs seems valid.
7764  * Return 1 if part of refs seems valid, need later check for rebuild ref
7765  * like missing block group and needs to search extent tree to rebuild them.
7766  * Return -1 if essential refs are missing and unable to rebuild.
7767  */
7768 static int check_chunk_refs(struct chunk_record *chunk_rec,
7769                             struct block_group_tree *block_group_cache,
7770                             struct device_extent_tree *dev_extent_cache,
7771                             int silent)
7772 {
7773         struct cache_extent *block_group_item;
7774         struct block_group_record *block_group_rec;
7775         struct cache_extent *dev_extent_item;
7776         struct device_extent_record *dev_extent_rec;
7777         u64 devid;
7778         u64 offset;
7779         u64 length;
7780         int metadump_v2 = 0;
7781         int i;
7782         int ret = 0;
7783
7784         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7785                                                chunk_rec->offset,
7786                                                chunk_rec->length);
7787         if (block_group_item) {
7788                 block_group_rec = container_of(block_group_item,
7789                                                struct block_group_record,
7790                                                cache);
7791                 if (chunk_rec->length != block_group_rec->offset ||
7792                     chunk_rec->offset != block_group_rec->objectid ||
7793                     (!metadump_v2 &&
7794                      chunk_rec->type_flags != block_group_rec->flags)) {
7795                         if (!silent)
7796                                 fprintf(stderr,
7797                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7798                                         chunk_rec->objectid,
7799                                         chunk_rec->type,
7800                                         chunk_rec->offset,
7801                                         chunk_rec->length,
7802                                         chunk_rec->offset,
7803                                         chunk_rec->type_flags,
7804                                         block_group_rec->objectid,
7805                                         block_group_rec->type,
7806                                         block_group_rec->offset,
7807                                         block_group_rec->offset,
7808                                         block_group_rec->objectid,
7809                                         block_group_rec->flags);
7810                         ret = -1;
7811                 } else {
7812                         list_del_init(&block_group_rec->list);
7813                         chunk_rec->bg_rec = block_group_rec;
7814                 }
7815         } else {
7816                 if (!silent)
7817                         fprintf(stderr,
7818                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7819                                 chunk_rec->objectid,
7820                                 chunk_rec->type,
7821                                 chunk_rec->offset,
7822                                 chunk_rec->length,
7823                                 chunk_rec->offset,
7824                                 chunk_rec->type_flags);
7825                 ret = 1;
7826         }
7827
7828         if (metadump_v2)
7829                 return ret;
7830
7831         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7832                                     chunk_rec->num_stripes);
7833         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7834                 devid = chunk_rec->stripes[i].devid;
7835                 offset = chunk_rec->stripes[i].offset;
7836                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7837                                                        devid, offset, length);
7838                 if (dev_extent_item) {
7839                         dev_extent_rec = container_of(dev_extent_item,
7840                                                 struct device_extent_record,
7841                                                 cache);
7842                         if (dev_extent_rec->objectid != devid ||
7843                             dev_extent_rec->offset != offset ||
7844                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7845                             dev_extent_rec->length != length) {
7846                                 if (!silent)
7847                                         fprintf(stderr,
7848                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7849                                                 chunk_rec->objectid,
7850                                                 chunk_rec->type,
7851                                                 chunk_rec->offset,
7852                                                 chunk_rec->stripes[i].devid,
7853                                                 chunk_rec->stripes[i].offset,
7854                                                 dev_extent_rec->objectid,
7855                                                 dev_extent_rec->offset,
7856                                                 dev_extent_rec->length);
7857                                 ret = -1;
7858                         } else {
7859                                 list_move(&dev_extent_rec->chunk_list,
7860                                           &chunk_rec->dextents);
7861                         }
7862                 } else {
7863                         if (!silent)
7864                                 fprintf(stderr,
7865                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7866                                         chunk_rec->objectid,
7867                                         chunk_rec->type,
7868                                         chunk_rec->offset,
7869                                         chunk_rec->stripes[i].devid,
7870                                         chunk_rec->stripes[i].offset);
7871                         ret = -1;
7872                 }
7873         }
7874         return ret;
7875 }
7876
7877 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7878 int check_chunks(struct cache_tree *chunk_cache,
7879                  struct block_group_tree *block_group_cache,
7880                  struct device_extent_tree *dev_extent_cache,
7881                  struct list_head *good, struct list_head *bad,
7882                  struct list_head *rebuild, int silent)
7883 {
7884         struct cache_extent *chunk_item;
7885         struct chunk_record *chunk_rec;
7886         struct block_group_record *bg_rec;
7887         struct device_extent_record *dext_rec;
7888         int err;
7889         int ret = 0;
7890
7891         chunk_item = first_cache_extent(chunk_cache);
7892         while (chunk_item) {
7893                 chunk_rec = container_of(chunk_item, struct chunk_record,
7894                                          cache);
7895                 err = check_chunk_refs(chunk_rec, block_group_cache,
7896                                        dev_extent_cache, silent);
7897                 if (err < 0)
7898                         ret = err;
7899                 if (err == 0 && good)
7900                         list_add_tail(&chunk_rec->list, good);
7901                 if (err > 0 && rebuild)
7902                         list_add_tail(&chunk_rec->list, rebuild);
7903                 if (err < 0 && bad)
7904                         list_add_tail(&chunk_rec->list, bad);
7905                 chunk_item = next_cache_extent(chunk_item);
7906         }
7907
7908         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7909                 if (!silent)
7910                         fprintf(stderr,
7911                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7912                                 bg_rec->objectid,
7913                                 bg_rec->offset,
7914                                 bg_rec->flags);
7915                 if (!ret)
7916                         ret = 1;
7917         }
7918
7919         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7920                             chunk_list) {
7921                 if (!silent)
7922                         fprintf(stderr,
7923                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7924                                 dext_rec->objectid,
7925                                 dext_rec->offset,
7926                                 dext_rec->length);
7927                 if (!ret)
7928                         ret = 1;
7929         }
7930         return ret;
7931 }
7932
7933
7934 static int check_device_used(struct device_record *dev_rec,
7935                              struct device_extent_tree *dext_cache)
7936 {
7937         struct cache_extent *cache;
7938         struct device_extent_record *dev_extent_rec;
7939         u64 total_byte = 0;
7940
7941         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7942         while (cache) {
7943                 dev_extent_rec = container_of(cache,
7944                                               struct device_extent_record,
7945                                               cache);
7946                 if (dev_extent_rec->objectid != dev_rec->devid)
7947                         break;
7948
7949                 list_del_init(&dev_extent_rec->device_list);
7950                 total_byte += dev_extent_rec->length;
7951                 cache = next_cache_extent(cache);
7952         }
7953
7954         if (total_byte != dev_rec->byte_used) {
7955                 fprintf(stderr,
7956                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7957                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7958                         dev_rec->type, dev_rec->offset);
7959                 return -1;
7960         } else {
7961                 return 0;
7962         }
7963 }
7964
7965 /* check btrfs_dev_item -> btrfs_dev_extent */
7966 static int check_devices(struct rb_root *dev_cache,
7967                          struct device_extent_tree *dev_extent_cache)
7968 {
7969         struct rb_node *dev_node;
7970         struct device_record *dev_rec;
7971         struct device_extent_record *dext_rec;
7972         int err;
7973         int ret = 0;
7974
7975         dev_node = rb_first(dev_cache);
7976         while (dev_node) {
7977                 dev_rec = container_of(dev_node, struct device_record, node);
7978                 err = check_device_used(dev_rec, dev_extent_cache);
7979                 if (err)
7980                         ret = err;
7981
7982                 dev_node = rb_next(dev_node);
7983         }
7984         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7985                             device_list) {
7986                 fprintf(stderr,
7987                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7988                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
7989                 if (!ret)
7990                         ret = 1;
7991         }
7992         return ret;
7993 }
7994
7995 static int add_root_item_to_list(struct list_head *head,
7996                                   u64 objectid, u64 bytenr, u64 last_snapshot,
7997                                   u8 level, u8 drop_level,
7998                                   int level_size, struct btrfs_key *drop_key)
7999 {
8000
8001         struct root_item_record *ri_rec;
8002         ri_rec = malloc(sizeof(*ri_rec));
8003         if (!ri_rec)
8004                 return -ENOMEM;
8005         ri_rec->bytenr = bytenr;
8006         ri_rec->objectid = objectid;
8007         ri_rec->level = level;
8008         ri_rec->level_size = level_size;
8009         ri_rec->drop_level = drop_level;
8010         ri_rec->last_snapshot = last_snapshot;
8011         if (drop_key)
8012                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8013         list_add_tail(&ri_rec->list, head);
8014
8015         return 0;
8016 }
8017
8018 static void free_root_item_list(struct list_head *list)
8019 {
8020         struct root_item_record *ri_rec;
8021
8022         while (!list_empty(list)) {
8023                 ri_rec = list_first_entry(list, struct root_item_record,
8024                                           list);
8025                 list_del_init(&ri_rec->list);
8026                 free(ri_rec);
8027         }
8028 }
8029
8030 static int deal_root_from_list(struct list_head *list,
8031                                struct btrfs_root *root,
8032                                struct block_info *bits,
8033                                int bits_nr,
8034                                struct cache_tree *pending,
8035                                struct cache_tree *seen,
8036                                struct cache_tree *reada,
8037                                struct cache_tree *nodes,
8038                                struct cache_tree *extent_cache,
8039                                struct cache_tree *chunk_cache,
8040                                struct rb_root *dev_cache,
8041                                struct block_group_tree *block_group_cache,
8042                                struct device_extent_tree *dev_extent_cache)
8043 {
8044         int ret = 0;
8045         u64 last;
8046
8047         while (!list_empty(list)) {
8048                 struct root_item_record *rec;
8049                 struct extent_buffer *buf;
8050                 rec = list_entry(list->next,
8051                                  struct root_item_record, list);
8052                 last = 0;
8053                 buf = read_tree_block(root->fs_info->tree_root,
8054                                       rec->bytenr, rec->level_size, 0);
8055                 if (!extent_buffer_uptodate(buf)) {
8056                         free_extent_buffer(buf);
8057                         ret = -EIO;
8058                         break;
8059                 }
8060                 add_root_to_pending(buf, extent_cache, pending,
8061                                     seen, nodes, rec->objectid);
8062                 /*
8063                  * To rebuild extent tree, we need deal with snapshot
8064                  * one by one, otherwise we deal with node firstly which
8065                  * can maximize readahead.
8066                  */
8067                 while (1) {
8068                         ret = run_next_block(root, bits, bits_nr, &last,
8069                                              pending, seen, reada, nodes,
8070                                              extent_cache, chunk_cache,
8071                                              dev_cache, block_group_cache,
8072                                              dev_extent_cache, rec);
8073                         if (ret != 0)
8074                                 break;
8075                 }
8076                 free_extent_buffer(buf);
8077                 list_del(&rec->list);
8078                 free(rec);
8079                 if (ret < 0)
8080                         break;
8081         }
8082         while (ret >= 0) {
8083                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8084                                      reada, nodes, extent_cache, chunk_cache,
8085                                      dev_cache, block_group_cache,
8086                                      dev_extent_cache, NULL);
8087                 if (ret != 0) {
8088                         if (ret > 0)
8089                                 ret = 0;
8090                         break;
8091                 }
8092         }
8093         return ret;
8094 }
8095
8096 static int check_chunks_and_extents(struct btrfs_root *root)
8097 {
8098         struct rb_root dev_cache;
8099         struct cache_tree chunk_cache;
8100         struct block_group_tree block_group_cache;
8101         struct device_extent_tree dev_extent_cache;
8102         struct cache_tree extent_cache;
8103         struct cache_tree seen;
8104         struct cache_tree pending;
8105         struct cache_tree reada;
8106         struct cache_tree nodes;
8107         struct extent_io_tree excluded_extents;
8108         struct cache_tree corrupt_blocks;
8109         struct btrfs_path path;
8110         struct btrfs_key key;
8111         struct btrfs_key found_key;
8112         int ret, err = 0;
8113         struct block_info *bits;
8114         int bits_nr;
8115         struct extent_buffer *leaf;
8116         int slot;
8117         struct btrfs_root_item ri;
8118         struct list_head dropping_trees;
8119         struct list_head normal_trees;
8120         struct btrfs_root *root1;
8121         u64 objectid;
8122         u32 level_size;
8123         u8 level;
8124
8125         dev_cache = RB_ROOT;
8126         cache_tree_init(&chunk_cache);
8127         block_group_tree_init(&block_group_cache);
8128         device_extent_tree_init(&dev_extent_cache);
8129
8130         cache_tree_init(&extent_cache);
8131         cache_tree_init(&seen);
8132         cache_tree_init(&pending);
8133         cache_tree_init(&nodes);
8134         cache_tree_init(&reada);
8135         cache_tree_init(&corrupt_blocks);
8136         extent_io_tree_init(&excluded_extents);
8137         INIT_LIST_HEAD(&dropping_trees);
8138         INIT_LIST_HEAD(&normal_trees);
8139
8140         if (repair) {
8141                 root->fs_info->excluded_extents = &excluded_extents;
8142                 root->fs_info->fsck_extent_cache = &extent_cache;
8143                 root->fs_info->free_extent_hook = free_extent_hook;
8144                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8145         }
8146
8147         bits_nr = 1024;
8148         bits = malloc(bits_nr * sizeof(struct block_info));
8149         if (!bits) {
8150                 perror("malloc");
8151                 exit(1);
8152         }
8153
8154         if (ctx.progress_enabled) {
8155                 ctx.tp = TASK_EXTENTS;
8156                 task_start(ctx.info);
8157         }
8158
8159 again:
8160         root1 = root->fs_info->tree_root;
8161         level = btrfs_header_level(root1->node);
8162         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8163                                     root1->node->start, 0, level, 0,
8164                                     btrfs_level_size(root1, level), NULL);
8165         if (ret < 0)
8166                 goto out;
8167         root1 = root->fs_info->chunk_root;
8168         level = btrfs_header_level(root1->node);
8169         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8170                                     root1->node->start, 0, level, 0,
8171                                     btrfs_level_size(root1, level), NULL);
8172         if (ret < 0)
8173                 goto out;
8174         btrfs_init_path(&path);
8175         key.offset = 0;
8176         key.objectid = 0;
8177         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8178         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8179                                         &key, &path, 0, 0);
8180         if (ret < 0)
8181                 goto out;
8182         while(1) {
8183                 leaf = path.nodes[0];
8184                 slot = path.slots[0];
8185                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8186                         ret = btrfs_next_leaf(root, &path);
8187                         if (ret != 0)
8188                                 break;
8189                         leaf = path.nodes[0];
8190                         slot = path.slots[0];
8191                 }
8192                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8193                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8194                         unsigned long offset;
8195                         u64 last_snapshot;
8196
8197                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8198                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8199                         last_snapshot = btrfs_root_last_snapshot(&ri);
8200                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8201                                 level = btrfs_root_level(&ri);
8202                                 level_size = btrfs_level_size(root, level);
8203                                 ret = add_root_item_to_list(&normal_trees,
8204                                                 found_key.objectid,
8205                                                 btrfs_root_bytenr(&ri),
8206                                                 last_snapshot, level,
8207                                                 0, level_size, NULL);
8208                                 if (ret < 0)
8209                                         goto out;
8210                         } else {
8211                                 level = btrfs_root_level(&ri);
8212                                 level_size = btrfs_level_size(root, level);
8213                                 objectid = found_key.objectid;
8214                                 btrfs_disk_key_to_cpu(&found_key,
8215                                                       &ri.drop_progress);
8216                                 ret = add_root_item_to_list(&dropping_trees,
8217                                                 objectid,
8218                                                 btrfs_root_bytenr(&ri),
8219                                                 last_snapshot, level,
8220                                                 ri.drop_level,
8221                                                 level_size, &found_key);
8222                                 if (ret < 0)
8223                                         goto out;
8224                         }
8225                 }
8226                 path.slots[0]++;
8227         }
8228         btrfs_release_path(&path);
8229
8230         /*
8231          * check_block can return -EAGAIN if it fixes something, please keep
8232          * this in mind when dealing with return values from these functions, if
8233          * we get -EAGAIN we want to fall through and restart the loop.
8234          */
8235         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8236                                   &seen, &reada, &nodes, &extent_cache,
8237                                   &chunk_cache, &dev_cache, &block_group_cache,
8238                                   &dev_extent_cache);
8239         if (ret < 0) {
8240                 if (ret == -EAGAIN)
8241                         goto loop;
8242                 goto out;
8243         }
8244         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8245                                   &pending, &seen, &reada, &nodes,
8246                                   &extent_cache, &chunk_cache, &dev_cache,
8247                                   &block_group_cache, &dev_extent_cache);
8248         if (ret < 0) {
8249                 if (ret == -EAGAIN)
8250                         goto loop;
8251                 goto out;
8252         }
8253
8254         ret = check_chunks(&chunk_cache, &block_group_cache,
8255                            &dev_extent_cache, NULL, NULL, NULL, 0);
8256         if (ret) {
8257                 if (ret == -EAGAIN)
8258                         goto loop;
8259                 err = ret;
8260         }
8261
8262         ret = check_extent_refs(root, &extent_cache);
8263         if (ret < 0) {
8264                 if (ret == -EAGAIN)
8265                         goto loop;
8266                 goto out;
8267         }
8268
8269         ret = check_devices(&dev_cache, &dev_extent_cache);
8270         if (ret && err)
8271                 ret = err;
8272
8273 out:
8274         task_stop(ctx.info);
8275         if (repair) {
8276                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8277                 extent_io_tree_cleanup(&excluded_extents);
8278                 root->fs_info->fsck_extent_cache = NULL;
8279                 root->fs_info->free_extent_hook = NULL;
8280                 root->fs_info->corrupt_blocks = NULL;
8281                 root->fs_info->excluded_extents = NULL;
8282         }
8283         free(bits);
8284         free_chunk_cache_tree(&chunk_cache);
8285         free_device_cache_tree(&dev_cache);
8286         free_block_group_tree(&block_group_cache);
8287         free_device_extent_tree(&dev_extent_cache);
8288         free_extent_cache_tree(&seen);
8289         free_extent_cache_tree(&pending);
8290         free_extent_cache_tree(&reada);
8291         free_extent_cache_tree(&nodes);
8292         return ret;
8293 loop:
8294         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8295         free_extent_cache_tree(&seen);
8296         free_extent_cache_tree(&pending);
8297         free_extent_cache_tree(&reada);
8298         free_extent_cache_tree(&nodes);
8299         free_chunk_cache_tree(&chunk_cache);
8300         free_block_group_tree(&block_group_cache);
8301         free_device_cache_tree(&dev_cache);
8302         free_device_extent_tree(&dev_extent_cache);
8303         free_extent_record_cache(root->fs_info, &extent_cache);
8304         free_root_item_list(&normal_trees);
8305         free_root_item_list(&dropping_trees);
8306         extent_io_tree_cleanup(&excluded_extents);
8307         goto again;
8308 }
8309
8310 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8311                            struct btrfs_root *root, int overwrite)
8312 {
8313         struct extent_buffer *c;
8314         struct extent_buffer *old = root->node;
8315         int level;
8316         int ret;
8317         struct btrfs_disk_key disk_key = {0,0,0};
8318
8319         level = 0;
8320
8321         if (overwrite) {
8322                 c = old;
8323                 extent_buffer_get(c);
8324                 goto init;
8325         }
8326         c = btrfs_alloc_free_block(trans, root,
8327                                    btrfs_level_size(root, 0),
8328                                    root->root_key.objectid,
8329                                    &disk_key, level, 0, 0);
8330         if (IS_ERR(c)) {
8331                 c = old;
8332                 extent_buffer_get(c);
8333                 overwrite = 1;
8334         }
8335 init:
8336         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8337         btrfs_set_header_level(c, level);
8338         btrfs_set_header_bytenr(c, c->start);
8339         btrfs_set_header_generation(c, trans->transid);
8340         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8341         btrfs_set_header_owner(c, root->root_key.objectid);
8342
8343         write_extent_buffer(c, root->fs_info->fsid,
8344                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8345
8346         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8347                             btrfs_header_chunk_tree_uuid(c),
8348                             BTRFS_UUID_SIZE);
8349
8350         btrfs_mark_buffer_dirty(c);
8351         /*
8352          * this case can happen in the following case:
8353          *
8354          * 1.overwrite previous root.
8355          *
8356          * 2.reinit reloc data root, this is because we skip pin
8357          * down reloc data tree before which means we can allocate
8358          * same block bytenr here.
8359          */
8360         if (old->start == c->start) {
8361                 btrfs_set_root_generation(&root->root_item,
8362                                           trans->transid);
8363                 root->root_item.level = btrfs_header_level(root->node);
8364                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8365                                         &root->root_key, &root->root_item);
8366                 if (ret) {
8367                         free_extent_buffer(c);
8368                         return ret;
8369                 }
8370         }
8371         free_extent_buffer(old);
8372         root->node = c;
8373         add_root_to_dirty_list(root);
8374         return 0;
8375 }
8376
8377 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8378                                 struct extent_buffer *eb, int tree_root)
8379 {
8380         struct extent_buffer *tmp;
8381         struct btrfs_root_item *ri;
8382         struct btrfs_key key;
8383         u64 bytenr;
8384         u32 leafsize;
8385         int level = btrfs_header_level(eb);
8386         int nritems;
8387         int ret;
8388         int i;
8389
8390         /*
8391          * If we have pinned this block before, don't pin it again.
8392          * This can not only avoid forever loop with broken filesystem
8393          * but also give us some speedups.
8394          */
8395         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8396                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8397                 return 0;
8398
8399         btrfs_pin_extent(fs_info, eb->start, eb->len);
8400
8401         leafsize = btrfs_super_leafsize(fs_info->super_copy);
8402         nritems = btrfs_header_nritems(eb);
8403         for (i = 0; i < nritems; i++) {
8404                 if (level == 0) {
8405                         btrfs_item_key_to_cpu(eb, &key, i);
8406                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8407                                 continue;
8408                         /* Skip the extent root and reloc roots */
8409                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8410                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8411                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8412                                 continue;
8413                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8414                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8415
8416                         /*
8417                          * If at any point we start needing the real root we
8418                          * will have to build a stump root for the root we are
8419                          * in, but for now this doesn't actually use the root so
8420                          * just pass in extent_root.
8421                          */
8422                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8423                                               leafsize, 0);
8424                         if (!extent_buffer_uptodate(tmp)) {
8425                                 fprintf(stderr, "Error reading root block\n");
8426                                 return -EIO;
8427                         }
8428                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8429                         free_extent_buffer(tmp);
8430                         if (ret)
8431                                 return ret;
8432                 } else {
8433                         bytenr = btrfs_node_blockptr(eb, i);
8434
8435                         /* If we aren't the tree root don't read the block */
8436                         if (level == 1 && !tree_root) {
8437                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
8438                                 continue;
8439                         }
8440
8441                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8442                                               leafsize, 0);
8443                         if (!extent_buffer_uptodate(tmp)) {
8444                                 fprintf(stderr, "Error reading tree block\n");
8445                                 return -EIO;
8446                         }
8447                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8448                         free_extent_buffer(tmp);
8449                         if (ret)
8450                                 return ret;
8451                 }
8452         }
8453
8454         return 0;
8455 }
8456
8457 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8458 {
8459         int ret;
8460
8461         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8462         if (ret)
8463                 return ret;
8464
8465         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8466 }
8467
8468 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8469 {
8470         struct btrfs_block_group_cache *cache;
8471         struct btrfs_path *path;
8472         struct extent_buffer *leaf;
8473         struct btrfs_chunk *chunk;
8474         struct btrfs_key key;
8475         int ret;
8476         u64 start;
8477
8478         path = btrfs_alloc_path();
8479         if (!path)
8480                 return -ENOMEM;
8481
8482         key.objectid = 0;
8483         key.type = BTRFS_CHUNK_ITEM_KEY;
8484         key.offset = 0;
8485
8486         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8487         if (ret < 0) {
8488                 btrfs_free_path(path);
8489                 return ret;
8490         }
8491
8492         /*
8493          * We do this in case the block groups were screwed up and had alloc
8494          * bits that aren't actually set on the chunks.  This happens with
8495          * restored images every time and could happen in real life I guess.
8496          */
8497         fs_info->avail_data_alloc_bits = 0;
8498         fs_info->avail_metadata_alloc_bits = 0;
8499         fs_info->avail_system_alloc_bits = 0;
8500
8501         /* First we need to create the in-memory block groups */
8502         while (1) {
8503                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8504                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8505                         if (ret < 0) {
8506                                 btrfs_free_path(path);
8507                                 return ret;
8508                         }
8509                         if (ret) {
8510                                 ret = 0;
8511                                 break;
8512                         }
8513                 }
8514                 leaf = path->nodes[0];
8515                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8516                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8517                         path->slots[0]++;
8518                         continue;
8519                 }
8520
8521                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8522                                        struct btrfs_chunk);
8523                 btrfs_add_block_group(fs_info, 0,
8524                                       btrfs_chunk_type(leaf, chunk),
8525                                       key.objectid, key.offset,
8526                                       btrfs_chunk_length(leaf, chunk));
8527                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8528                                  key.offset + btrfs_chunk_length(leaf, chunk),
8529                                  GFP_NOFS);
8530                 path->slots[0]++;
8531         }
8532         start = 0;
8533         while (1) {
8534                 cache = btrfs_lookup_first_block_group(fs_info, start);
8535                 if (!cache)
8536                         break;
8537                 cache->cached = 1;
8538                 start = cache->key.objectid + cache->key.offset;
8539         }
8540
8541         btrfs_free_path(path);
8542         return 0;
8543 }
8544
8545 static int reset_balance(struct btrfs_trans_handle *trans,
8546                          struct btrfs_fs_info *fs_info)
8547 {
8548         struct btrfs_root *root = fs_info->tree_root;
8549         struct btrfs_path *path;
8550         struct extent_buffer *leaf;
8551         struct btrfs_key key;
8552         int del_slot, del_nr = 0;
8553         int ret;
8554         int found = 0;
8555
8556         path = btrfs_alloc_path();
8557         if (!path)
8558                 return -ENOMEM;
8559
8560         key.objectid = BTRFS_BALANCE_OBJECTID;
8561         key.type = BTRFS_BALANCE_ITEM_KEY;
8562         key.offset = 0;
8563
8564         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8565         if (ret) {
8566                 if (ret > 0)
8567                         ret = 0;
8568                 if (!ret)
8569                         goto reinit_data_reloc;
8570                 else
8571                         goto out;
8572         }
8573
8574         ret = btrfs_del_item(trans, root, path);
8575         if (ret)
8576                 goto out;
8577         btrfs_release_path(path);
8578
8579         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8580         key.type = BTRFS_ROOT_ITEM_KEY;
8581         key.offset = 0;
8582
8583         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8584         if (ret < 0)
8585                 goto out;
8586         while (1) {
8587                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8588                         if (!found)
8589                                 break;
8590
8591                         if (del_nr) {
8592                                 ret = btrfs_del_items(trans, root, path,
8593                                                       del_slot, del_nr);
8594                                 del_nr = 0;
8595                                 if (ret)
8596                                         goto out;
8597                         }
8598                         key.offset++;
8599                         btrfs_release_path(path);
8600
8601                         found = 0;
8602                         ret = btrfs_search_slot(trans, root, &key, path,
8603                                                 -1, 1);
8604                         if (ret < 0)
8605                                 goto out;
8606                         continue;
8607                 }
8608                 found = 1;
8609                 leaf = path->nodes[0];
8610                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8611                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8612                         break;
8613                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8614                         path->slots[0]++;
8615                         continue;
8616                 }
8617                 if (!del_nr) {
8618                         del_slot = path->slots[0];
8619                         del_nr = 1;
8620                 } else {
8621                         del_nr++;
8622                 }
8623                 path->slots[0]++;
8624         }
8625
8626         if (del_nr) {
8627                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8628                 if (ret)
8629                         goto out;
8630         }
8631         btrfs_release_path(path);
8632
8633 reinit_data_reloc:
8634         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8635         key.type = BTRFS_ROOT_ITEM_KEY;
8636         key.offset = (u64)-1;
8637         root = btrfs_read_fs_root(fs_info, &key);
8638         if (IS_ERR(root)) {
8639                 fprintf(stderr, "Error reading data reloc tree\n");
8640                 ret = PTR_ERR(root);
8641                 goto out;
8642         }
8643         record_root_in_trans(trans, root);
8644         ret = btrfs_fsck_reinit_root(trans, root, 0);
8645         if (ret)
8646                 goto out;
8647         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8648 out:
8649         btrfs_free_path(path);
8650         return ret;
8651 }
8652
8653 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8654                               struct btrfs_fs_info *fs_info)
8655 {
8656         u64 start = 0;
8657         int ret;
8658
8659         /*
8660          * The only reason we don't do this is because right now we're just
8661          * walking the trees we find and pinning down their bytes, we don't look
8662          * at any of the leaves.  In order to do mixed groups we'd have to check
8663          * the leaves of any fs roots and pin down the bytes for any file
8664          * extents we find.  Not hard but why do it if we don't have to?
8665          */
8666         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8667                 fprintf(stderr, "We don't support re-initing the extent tree "
8668                         "for mixed block groups yet, please notify a btrfs "
8669                         "developer you want to do this so they can add this "
8670                         "functionality.\n");
8671                 return -EINVAL;
8672         }
8673
8674         /*
8675          * first we need to walk all of the trees except the extent tree and pin
8676          * down the bytes that are in use so we don't overwrite any existing
8677          * metadata.
8678          */
8679         ret = pin_metadata_blocks(fs_info);
8680         if (ret) {
8681                 fprintf(stderr, "error pinning down used bytes\n");
8682                 return ret;
8683         }
8684
8685         /*
8686          * Need to drop all the block groups since we're going to recreate all
8687          * of them again.
8688          */
8689         btrfs_free_block_groups(fs_info);
8690         ret = reset_block_groups(fs_info);
8691         if (ret) {
8692                 fprintf(stderr, "error resetting the block groups\n");
8693                 return ret;
8694         }
8695
8696         /* Ok we can allocate now, reinit the extent root */
8697         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8698         if (ret) {
8699                 fprintf(stderr, "extent root initialization failed\n");
8700                 /*
8701                  * When the transaction code is updated we should end the
8702                  * transaction, but for now progs only knows about commit so
8703                  * just return an error.
8704                  */
8705                 return ret;
8706         }
8707
8708         /*
8709          * Now we have all the in-memory block groups setup so we can make
8710          * allocations properly, and the metadata we care about is safe since we
8711          * pinned all of it above.
8712          */
8713         while (1) {
8714                 struct btrfs_block_group_cache *cache;
8715
8716                 cache = btrfs_lookup_first_block_group(fs_info, start);
8717                 if (!cache)
8718                         break;
8719                 start = cache->key.objectid + cache->key.offset;
8720                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8721                                         &cache->key, &cache->item,
8722                                         sizeof(cache->item));
8723                 if (ret) {
8724                         fprintf(stderr, "Error adding block group\n");
8725                         return ret;
8726                 }
8727                 btrfs_extent_post_op(trans, fs_info->extent_root);
8728         }
8729
8730         ret = reset_balance(trans, fs_info);
8731         if (ret)
8732                 fprintf(stderr, "error reseting the pending balance\n");
8733
8734         return ret;
8735 }
8736
8737 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8738 {
8739         struct btrfs_path *path;
8740         struct btrfs_trans_handle *trans;
8741         struct btrfs_key key;
8742         int ret;
8743
8744         printf("Recowing metadata block %llu\n", eb->start);
8745         key.objectid = btrfs_header_owner(eb);
8746         key.type = BTRFS_ROOT_ITEM_KEY;
8747         key.offset = (u64)-1;
8748
8749         root = btrfs_read_fs_root(root->fs_info, &key);
8750         if (IS_ERR(root)) {
8751                 fprintf(stderr, "Couldn't find owner root %llu\n",
8752                         key.objectid);
8753                 return PTR_ERR(root);
8754         }
8755
8756         path = btrfs_alloc_path();
8757         if (!path)
8758                 return -ENOMEM;
8759
8760         trans = btrfs_start_transaction(root, 1);
8761         if (IS_ERR(trans)) {
8762                 btrfs_free_path(path);
8763                 return PTR_ERR(trans);
8764         }
8765
8766         path->lowest_level = btrfs_header_level(eb);
8767         if (path->lowest_level)
8768                 btrfs_node_key_to_cpu(eb, &key, 0);
8769         else
8770                 btrfs_item_key_to_cpu(eb, &key, 0);
8771
8772         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8773         btrfs_commit_transaction(trans, root);
8774         btrfs_free_path(path);
8775         return ret;
8776 }
8777
8778 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8779 {
8780         struct btrfs_path *path;
8781         struct btrfs_trans_handle *trans;
8782         struct btrfs_key key;
8783         int ret;
8784
8785         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8786                bad->key.type, bad->key.offset);
8787         key.objectid = bad->root_id;
8788         key.type = BTRFS_ROOT_ITEM_KEY;
8789         key.offset = (u64)-1;
8790
8791         root = btrfs_read_fs_root(root->fs_info, &key);
8792         if (IS_ERR(root)) {
8793                 fprintf(stderr, "Couldn't find owner root %llu\n",
8794                         key.objectid);
8795                 return PTR_ERR(root);
8796         }
8797
8798         path = btrfs_alloc_path();
8799         if (!path)
8800                 return -ENOMEM;
8801
8802         trans = btrfs_start_transaction(root, 1);
8803         if (IS_ERR(trans)) {
8804                 btrfs_free_path(path);
8805                 return PTR_ERR(trans);
8806         }
8807
8808         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8809         if (ret) {
8810                 if (ret > 0)
8811                         ret = 0;
8812                 goto out;
8813         }
8814         ret = btrfs_del_item(trans, root, path);
8815 out:
8816         btrfs_commit_transaction(trans, root);
8817         btrfs_free_path(path);
8818         return ret;
8819 }
8820
8821 static int zero_log_tree(struct btrfs_root *root)
8822 {
8823         struct btrfs_trans_handle *trans;
8824         int ret;
8825
8826         trans = btrfs_start_transaction(root, 1);
8827         if (IS_ERR(trans)) {
8828                 ret = PTR_ERR(trans);
8829                 return ret;
8830         }
8831         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8832         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8833         ret = btrfs_commit_transaction(trans, root);
8834         return ret;
8835 }
8836
8837 static int populate_csum(struct btrfs_trans_handle *trans,
8838                          struct btrfs_root *csum_root, char *buf, u64 start,
8839                          u64 len)
8840 {
8841         u64 offset = 0;
8842         u64 sectorsize;
8843         int ret = 0;
8844
8845         while (offset < len) {
8846                 sectorsize = csum_root->sectorsize;
8847                 ret = read_extent_data(csum_root, buf, start + offset,
8848                                        &sectorsize, 0);
8849                 if (ret)
8850                         break;
8851                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8852                                             start + offset, buf, sectorsize);
8853                 if (ret)
8854                         break;
8855                 offset += sectorsize;
8856         }
8857         return ret;
8858 }
8859
8860 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8861                                       struct btrfs_root *csum_root,
8862                                       struct btrfs_root *cur_root)
8863 {
8864         struct btrfs_path *path;
8865         struct btrfs_key key;
8866         struct extent_buffer *node;
8867         struct btrfs_file_extent_item *fi;
8868         char *buf = NULL;
8869         u64 start = 0;
8870         u64 len = 0;
8871         int slot = 0;
8872         int ret = 0;
8873
8874         path = btrfs_alloc_path();
8875         if (!path)
8876                 return -ENOMEM;
8877         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8878         if (!buf) {
8879                 ret = -ENOMEM;
8880                 goto out;
8881         }
8882
8883         key.objectid = 0;
8884         key.offset = 0;
8885         key.type = 0;
8886
8887         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8888         if (ret < 0)
8889                 goto out;
8890         /* Iterate all regular file extents and fill its csum */
8891         while (1) {
8892                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8893
8894                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8895                         goto next;
8896                 node = path->nodes[0];
8897                 slot = path->slots[0];
8898                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8899                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8900                         goto next;
8901                 start = btrfs_file_extent_disk_bytenr(node, fi);
8902                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8903
8904                 ret = populate_csum(trans, csum_root, buf, start, len);
8905                 if (ret == -EEXIST)
8906                         ret = 0;
8907                 if (ret < 0)
8908                         goto out;
8909 next:
8910                 /*
8911                  * TODO: if next leaf is corrupted, jump to nearest next valid
8912                  * leaf.
8913                  */
8914                 ret = btrfs_next_item(cur_root, path);
8915                 if (ret < 0)
8916                         goto out;
8917                 if (ret > 0) {
8918                         ret = 0;
8919                         goto out;
8920                 }
8921         }
8922
8923 out:
8924         btrfs_free_path(path);
8925         free(buf);
8926         return ret;
8927 }
8928
8929 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8930                                   struct btrfs_root *csum_root)
8931 {
8932         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8933         struct btrfs_path *path;
8934         struct btrfs_root *tree_root = fs_info->tree_root;
8935         struct btrfs_root *cur_root;
8936         struct extent_buffer *node;
8937         struct btrfs_key key;
8938         int slot = 0;
8939         int ret = 0;
8940
8941         path = btrfs_alloc_path();
8942         if (!path)
8943                 return -ENOMEM;
8944
8945         key.objectid = BTRFS_FS_TREE_OBJECTID;
8946         key.offset = 0;
8947         key.type = BTRFS_ROOT_ITEM_KEY;
8948
8949         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8950         if (ret < 0)
8951                 goto out;
8952         if (ret > 0) {
8953                 ret = -ENOENT;
8954                 goto out;
8955         }
8956
8957         while (1) {
8958                 node = path->nodes[0];
8959                 slot = path->slots[0];
8960                 btrfs_item_key_to_cpu(node, &key, slot);
8961                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8962                         goto out;
8963                 if (key.type != BTRFS_ROOT_ITEM_KEY)
8964                         goto next;
8965                 if (!is_fstree(key.objectid))
8966                         goto next;
8967                 key.offset = (u64)-1;
8968
8969                 cur_root = btrfs_read_fs_root(fs_info, &key);
8970                 if (IS_ERR(cur_root) || !cur_root) {
8971                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8972                                 key.objectid);
8973                         goto out;
8974                 }
8975                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8976                                 cur_root);
8977                 if (ret < 0)
8978                         goto out;
8979 next:
8980                 ret = btrfs_next_item(tree_root, path);
8981                 if (ret > 0) {
8982                         ret = 0;
8983                         goto out;
8984                 }
8985                 if (ret < 0)
8986                         goto out;
8987         }
8988
8989 out:
8990         btrfs_free_path(path);
8991         return ret;
8992 }
8993
8994 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8995                                       struct btrfs_root *csum_root)
8996 {
8997         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8998         struct btrfs_path *path;
8999         struct btrfs_extent_item *ei;
9000         struct extent_buffer *leaf;
9001         char *buf;
9002         struct btrfs_key key;
9003         int ret;
9004
9005         path = btrfs_alloc_path();
9006         if (!path)
9007                 return -ENOMEM;
9008
9009         key.objectid = 0;
9010         key.type = BTRFS_EXTENT_ITEM_KEY;
9011         key.offset = 0;
9012
9013         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9014         if (ret < 0) {
9015                 btrfs_free_path(path);
9016                 return ret;
9017         }
9018
9019         buf = malloc(csum_root->sectorsize);
9020         if (!buf) {
9021                 btrfs_free_path(path);
9022                 return -ENOMEM;
9023         }
9024
9025         while (1) {
9026                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9027                         ret = btrfs_next_leaf(extent_root, path);
9028                         if (ret < 0)
9029                                 break;
9030                         if (ret) {
9031                                 ret = 0;
9032                                 break;
9033                         }
9034                 }
9035                 leaf = path->nodes[0];
9036
9037                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9038                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9039                         path->slots[0]++;
9040                         continue;
9041                 }
9042
9043                 ei = btrfs_item_ptr(leaf, path->slots[0],
9044                                     struct btrfs_extent_item);
9045                 if (!(btrfs_extent_flags(leaf, ei) &
9046                       BTRFS_EXTENT_FLAG_DATA)) {
9047                         path->slots[0]++;
9048                         continue;
9049                 }
9050
9051                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9052                                     key.offset);
9053                 if (ret)
9054                         break;
9055                 path->slots[0]++;
9056         }
9057
9058         btrfs_free_path(path);
9059         free(buf);
9060         return ret;
9061 }
9062
9063 /*
9064  * Recalculate the csum and put it into the csum tree.
9065  *
9066  * Extent tree init will wipe out all the extent info, so in that case, we
9067  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9068  * will use fs/subvol trees to init the csum tree.
9069  */
9070 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9071                           struct btrfs_root *csum_root,
9072                           int search_fs_tree)
9073 {
9074         if (search_fs_tree)
9075                 return fill_csum_tree_from_fs(trans, csum_root);
9076         else
9077                 return fill_csum_tree_from_extent(trans, csum_root);
9078 }
9079
9080 struct root_item_info {
9081         /* level of the root */
9082         u8 level;
9083         /* number of nodes at this level, must be 1 for a root */
9084         int node_count;
9085         u64 bytenr;
9086         u64 gen;
9087         struct cache_extent cache_extent;
9088 };
9089
9090 static struct cache_tree *roots_info_cache = NULL;
9091
9092 static void free_roots_info_cache(void)
9093 {
9094         if (!roots_info_cache)
9095                 return;
9096
9097         while (!cache_tree_empty(roots_info_cache)) {
9098                 struct cache_extent *entry;
9099                 struct root_item_info *rii;
9100
9101                 entry = first_cache_extent(roots_info_cache);
9102                 if (!entry)
9103                         break;
9104                 remove_cache_extent(roots_info_cache, entry);
9105                 rii = container_of(entry, struct root_item_info, cache_extent);
9106                 free(rii);
9107         }
9108
9109         free(roots_info_cache);
9110         roots_info_cache = NULL;
9111 }
9112
9113 static int build_roots_info_cache(struct btrfs_fs_info *info)
9114 {
9115         int ret = 0;
9116         struct btrfs_key key;
9117         struct extent_buffer *leaf;
9118         struct btrfs_path *path;
9119
9120         if (!roots_info_cache) {
9121                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9122                 if (!roots_info_cache)
9123                         return -ENOMEM;
9124                 cache_tree_init(roots_info_cache);
9125         }
9126
9127         path = btrfs_alloc_path();
9128         if (!path)
9129                 return -ENOMEM;
9130
9131         key.objectid = 0;
9132         key.type = BTRFS_EXTENT_ITEM_KEY;
9133         key.offset = 0;
9134
9135         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9136         if (ret < 0)
9137                 goto out;
9138         leaf = path->nodes[0];
9139
9140         while (1) {
9141                 struct btrfs_key found_key;
9142                 struct btrfs_extent_item *ei;
9143                 struct btrfs_extent_inline_ref *iref;
9144                 int slot = path->slots[0];
9145                 int type;
9146                 u64 flags;
9147                 u64 root_id;
9148                 u8 level;
9149                 struct cache_extent *entry;
9150                 struct root_item_info *rii;
9151
9152                 if (slot >= btrfs_header_nritems(leaf)) {
9153                         ret = btrfs_next_leaf(info->extent_root, path);
9154                         if (ret < 0) {
9155                                 break;
9156                         } else if (ret) {
9157                                 ret = 0;
9158                                 break;
9159                         }
9160                         leaf = path->nodes[0];
9161                         slot = path->slots[0];
9162                 }
9163
9164                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9165
9166                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9167                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9168                         goto next;
9169
9170                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9171                 flags = btrfs_extent_flags(leaf, ei);
9172
9173                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9174                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9175                         goto next;
9176
9177                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9178                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9179                         level = found_key.offset;
9180                 } else {
9181                         struct btrfs_tree_block_info *info;
9182
9183                         info = (struct btrfs_tree_block_info *)(ei + 1);
9184                         iref = (struct btrfs_extent_inline_ref *)(info + 1);
9185                         level = btrfs_tree_block_level(leaf, info);
9186                 }
9187
9188                 /*
9189                  * For a root extent, it must be of the following type and the
9190                  * first (and only one) iref in the item.
9191                  */
9192                 type = btrfs_extent_inline_ref_type(leaf, iref);
9193                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9194                         goto next;
9195
9196                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9197                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9198                 if (!entry) {
9199                         rii = malloc(sizeof(struct root_item_info));
9200                         if (!rii) {
9201                                 ret = -ENOMEM;
9202                                 goto out;
9203                         }
9204                         rii->cache_extent.start = root_id;
9205                         rii->cache_extent.size = 1;
9206                         rii->level = (u8)-1;
9207                         entry = &rii->cache_extent;
9208                         ret = insert_cache_extent(roots_info_cache, entry);
9209                         ASSERT(ret == 0);
9210                 } else {
9211                         rii = container_of(entry, struct root_item_info,
9212                                            cache_extent);
9213                 }
9214
9215                 ASSERT(rii->cache_extent.start == root_id);
9216                 ASSERT(rii->cache_extent.size == 1);
9217
9218                 if (level > rii->level || rii->level == (u8)-1) {
9219                         rii->level = level;
9220                         rii->bytenr = found_key.objectid;
9221                         rii->gen = btrfs_extent_generation(leaf, ei);
9222                         rii->node_count = 1;
9223                 } else if (level == rii->level) {
9224                         rii->node_count++;
9225                 }
9226 next:
9227                 path->slots[0]++;
9228         }
9229
9230 out:
9231         btrfs_free_path(path);
9232
9233         return ret;
9234 }
9235
9236 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9237                                   struct btrfs_path *path,
9238                                   const struct btrfs_key *root_key,
9239                                   const int read_only_mode)
9240 {
9241         const u64 root_id = root_key->objectid;
9242         struct cache_extent *entry;
9243         struct root_item_info *rii;
9244         struct btrfs_root_item ri;
9245         unsigned long offset;
9246
9247         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9248         if (!entry) {
9249                 fprintf(stderr,
9250                         "Error: could not find extent items for root %llu\n",
9251                         root_key->objectid);
9252                 return -ENOENT;
9253         }
9254
9255         rii = container_of(entry, struct root_item_info, cache_extent);
9256         ASSERT(rii->cache_extent.start == root_id);
9257         ASSERT(rii->cache_extent.size == 1);
9258
9259         if (rii->node_count != 1) {
9260                 fprintf(stderr,
9261                         "Error: could not find btree root extent for root %llu\n",
9262                         root_id);
9263                 return -ENOENT;
9264         }
9265
9266         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9267         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9268
9269         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9270             btrfs_root_level(&ri) != rii->level ||
9271             btrfs_root_generation(&ri) != rii->gen) {
9272
9273                 /*
9274                  * If we're in repair mode but our caller told us to not update
9275                  * the root item, i.e. just check if it needs to be updated, don't
9276                  * print this message, since the caller will call us again shortly
9277                  * for the same root item without read only mode (the caller will
9278                  * open a transaction first).
9279                  */
9280                 if (!(read_only_mode && repair))
9281                         fprintf(stderr,
9282                                 "%sroot item for root %llu,"
9283                                 " current bytenr %llu, current gen %llu, current level %u,"
9284                                 " new bytenr %llu, new gen %llu, new level %u\n",
9285                                 (read_only_mode ? "" : "fixing "),
9286                                 root_id,
9287                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9288                                 btrfs_root_level(&ri),
9289                                 rii->bytenr, rii->gen, rii->level);
9290
9291                 if (btrfs_root_generation(&ri) > rii->gen) {
9292                         fprintf(stderr,
9293                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9294                                 root_id, btrfs_root_generation(&ri), rii->gen);
9295                         return -EINVAL;
9296                 }
9297
9298                 if (!read_only_mode) {
9299                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9300                         btrfs_set_root_level(&ri, rii->level);
9301                         btrfs_set_root_generation(&ri, rii->gen);
9302                         write_extent_buffer(path->nodes[0], &ri,
9303                                             offset, sizeof(ri));
9304                 }
9305
9306                 return 1;
9307         }
9308
9309         return 0;
9310 }
9311
9312 /*
9313  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9314  * caused read-only snapshots to be corrupted if they were created at a moment
9315  * when the source subvolume/snapshot had orphan items. The issue was that the
9316  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9317  * node instead of the post orphan cleanup root node.
9318  * So this function, and its callees, just detects and fixes those cases. Even
9319  * though the regression was for read-only snapshots, this function applies to
9320  * any snapshot/subvolume root.
9321  * This must be run before any other repair code - not doing it so, makes other
9322  * repair code delete or modify backrefs in the extent tree for example, which
9323  * will result in an inconsistent fs after repairing the root items.
9324  */
9325 static int repair_root_items(struct btrfs_fs_info *info)
9326 {
9327         struct btrfs_path *path = NULL;
9328         struct btrfs_key key;
9329         struct extent_buffer *leaf;
9330         struct btrfs_trans_handle *trans = NULL;
9331         int ret = 0;
9332         int bad_roots = 0;
9333         int need_trans = 0;
9334
9335         ret = build_roots_info_cache(info);
9336         if (ret)
9337                 goto out;
9338
9339         path = btrfs_alloc_path();
9340         if (!path) {
9341                 ret = -ENOMEM;
9342                 goto out;
9343         }
9344
9345         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9346         key.type = BTRFS_ROOT_ITEM_KEY;
9347         key.offset = 0;
9348
9349 again:
9350         /*
9351          * Avoid opening and committing transactions if a leaf doesn't have
9352          * any root items that need to be fixed, so that we avoid rotating
9353          * backup roots unnecessarily.
9354          */
9355         if (need_trans) {
9356                 trans = btrfs_start_transaction(info->tree_root, 1);
9357                 if (IS_ERR(trans)) {
9358                         ret = PTR_ERR(trans);
9359                         goto out;
9360                 }
9361         }
9362
9363         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9364                                 0, trans ? 1 : 0);
9365         if (ret < 0)
9366                 goto out;
9367         leaf = path->nodes[0];
9368
9369         while (1) {
9370                 struct btrfs_key found_key;
9371
9372                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9373                         int no_more_keys = find_next_key(path, &key);
9374
9375                         btrfs_release_path(path);
9376                         if (trans) {
9377                                 ret = btrfs_commit_transaction(trans,
9378                                                                info->tree_root);
9379                                 trans = NULL;
9380                                 if (ret < 0)
9381                                         goto out;
9382                         }
9383                         need_trans = 0;
9384                         if (no_more_keys)
9385                                 break;
9386                         goto again;
9387                 }
9388
9389                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9390
9391                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9392                         goto next;
9393                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9394                         goto next;
9395
9396                 ret = maybe_repair_root_item(info, path, &found_key,
9397                                              trans ? 0 : 1);
9398                 if (ret < 0)
9399                         goto out;
9400                 if (ret) {
9401                         if (!trans && repair) {
9402                                 need_trans = 1;
9403                                 key = found_key;
9404                                 btrfs_release_path(path);
9405                                 goto again;
9406                         }
9407                         bad_roots++;
9408                 }
9409 next:
9410                 path->slots[0]++;
9411         }
9412         ret = 0;
9413 out:
9414         free_roots_info_cache();
9415         btrfs_free_path(path);
9416         if (trans)
9417                 btrfs_commit_transaction(trans, info->tree_root);
9418         if (ret < 0)
9419                 return ret;
9420
9421         return bad_roots;
9422 }
9423
9424 const char * const cmd_check_usage[] = {
9425         "btrfs check [options] <device>",
9426         "Check structural inegrity of a filesystem (unmounted).",
9427         "Check structural inegrity of an unmounted filesystem. Verify internal",
9428         "trees' consistency and item connectivity. In the repair mode try to",
9429         "fix the problems found.",
9430         "WARNING: the repair mode is considered dangerous",
9431         "",
9432         "-s|--super <superblock>     use this superblock copy",
9433         "-b|--backup                 use the backup root copy",
9434         "--repair                    try to repair the filesystem",
9435         "--readonly                  run in read-only mode (default)",
9436         "--init-csum-tree            create a new CRC tree",
9437         "--init-extent-tree          create a new extent tree",
9438         "--check-data-csum           verify checkums of data blocks",
9439         "-Q|--qgroup-report           print a report on qgroup consistency",
9440         "-E|--subvol-extents <subvolid>",
9441         "                            print subvolume extents and sharing state",
9442         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9443         "-p|--progress               indicate progress",
9444         NULL
9445 };
9446
9447 int cmd_check(int argc, char **argv)
9448 {
9449         struct cache_tree root_cache;
9450         struct btrfs_root *root;
9451         struct btrfs_fs_info *info;
9452         u64 bytenr = 0;
9453         u64 subvolid = 0;
9454         u64 tree_root_bytenr = 0;
9455         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9456         int ret;
9457         u64 num;
9458         int init_csum_tree = 0;
9459         int readonly = 0;
9460         int qgroup_report = 0;
9461         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9462
9463         while(1) {
9464                 int c;
9465                 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9466                         OPT_CHECK_CSUM, OPT_READONLY };
9467                 static const struct option long_options[] = {
9468                         { "super", required_argument, NULL, 's' },
9469                         { "repair", no_argument, NULL, OPT_REPAIR },
9470                         { "readonly", no_argument, NULL, OPT_READONLY },
9471                         { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9472                         { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9473                         { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9474                         { "backup", no_argument, NULL, 'b' },
9475                         { "subvol-extents", required_argument, NULL, 'E' },
9476                         { "qgroup-report", no_argument, NULL, 'Q' },
9477                         { "tree-root", required_argument, NULL, 'r' },
9478                         { "progress", no_argument, NULL, 'p' },
9479                         { NULL, 0, NULL, 0}
9480                 };
9481
9482                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9483                 if (c < 0)
9484                         break;
9485                 switch(c) {
9486                         case 'a': /* ignored */ break;
9487                         case 'b':
9488                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9489                                 break;
9490                         case 's':
9491                                 num = arg_strtou64(optarg);
9492                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9493                                         fprintf(stderr,
9494                                                 "ERROR: super mirror should be less than: %d\n",
9495                                                 BTRFS_SUPER_MIRROR_MAX);
9496                                         exit(1);
9497                                 }
9498                                 bytenr = btrfs_sb_offset(((int)num));
9499                                 printf("using SB copy %llu, bytenr %llu\n", num,
9500                                        (unsigned long long)bytenr);
9501                                 break;
9502                         case 'Q':
9503                                 qgroup_report = 1;
9504                                 break;
9505                         case 'E':
9506                                 subvolid = arg_strtou64(optarg);
9507                                 break;
9508                         case 'r':
9509                                 tree_root_bytenr = arg_strtou64(optarg);
9510                                 break;
9511                         case 'p':
9512                                 ctx.progress_enabled = true;
9513                                 break;
9514                         case '?':
9515                         case 'h':
9516                                 usage(cmd_check_usage);
9517                         case OPT_REPAIR:
9518                                 printf("enabling repair mode\n");
9519                                 repair = 1;
9520                                 ctree_flags |= OPEN_CTREE_WRITES;
9521                                 break;
9522                         case OPT_READONLY:
9523                                 readonly = 1;
9524                                 break;
9525                         case OPT_INIT_CSUM:
9526                                 printf("Creating a new CRC tree\n");
9527                                 init_csum_tree = 1;
9528                                 repair = 1;
9529                                 ctree_flags |= OPEN_CTREE_WRITES;
9530                                 break;
9531                         case OPT_INIT_EXTENT:
9532                                 init_extent_tree = 1;
9533                                 ctree_flags |= (OPEN_CTREE_WRITES |
9534                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9535                                 repair = 1;
9536                                 break;
9537                         case OPT_CHECK_CSUM:
9538                                 check_data_csum = 1;
9539                                 break;
9540                 }
9541         }
9542         argc = argc - optind;
9543
9544         if (check_argc_exact(argc, 1))
9545                 usage(cmd_check_usage);
9546
9547         if (ctx.progress_enabled) {
9548                 ctx.tp = TASK_NOTHING;
9549                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9550         }
9551
9552         /* This check is the only reason for --readonly to exist */
9553         if (readonly && repair) {
9554                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9555                 exit(1);
9556         }
9557
9558         radix_tree_init();
9559         cache_tree_init(&root_cache);
9560
9561         if((ret = check_mounted(argv[optind])) < 0) {
9562                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9563                 goto err_out;
9564         } else if(ret) {
9565                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9566                 ret = -EBUSY;
9567                 goto err_out;
9568         }
9569
9570         /* only allow partial opening under repair mode */
9571         if (repair)
9572                 ctree_flags |= OPEN_CTREE_PARTIAL;
9573
9574         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9575                                   ctree_flags);
9576         if (!info) {
9577                 fprintf(stderr, "Couldn't open file system\n");
9578                 ret = -EIO;
9579                 goto err_out;
9580         }
9581
9582         global_info = info;
9583         root = info->fs_root;
9584
9585         /*
9586          * repair mode will force us to commit transaction which
9587          * will make us fail to load log tree when mounting.
9588          */
9589         if (repair && btrfs_super_log_root(info->super_copy)) {
9590                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9591                 if (!ret) {
9592                         ret = 1;
9593                         goto close_out;
9594                 }
9595                 ret = zero_log_tree(root);
9596                 if (ret) {
9597                         fprintf(stderr, "fail to zero log tree\n");
9598                         goto close_out;
9599                 }
9600         }
9601
9602         uuid_unparse(info->super_copy->fsid, uuidbuf);
9603         if (qgroup_report) {
9604                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9605                        uuidbuf);
9606                 ret = qgroup_verify_all(info);
9607                 if (ret == 0)
9608                         print_qgroup_report(1);
9609                 goto close_out;
9610         }
9611         if (subvolid) {
9612                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9613                        subvolid, argv[optind], uuidbuf);
9614                 ret = print_extent_state(info, subvolid);
9615                 goto close_out;
9616         }
9617         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9618
9619         if (!extent_buffer_uptodate(info->tree_root->node) ||
9620             !extent_buffer_uptodate(info->dev_root->node) ||
9621             !extent_buffer_uptodate(info->chunk_root->node)) {
9622                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9623                 ret = -EIO;
9624                 goto close_out;
9625         }
9626
9627         if (init_extent_tree || init_csum_tree) {
9628                 struct btrfs_trans_handle *trans;
9629
9630                 trans = btrfs_start_transaction(info->extent_root, 0);
9631                 if (IS_ERR(trans)) {
9632                         fprintf(stderr, "Error starting transaction\n");
9633                         ret = PTR_ERR(trans);
9634                         goto close_out;
9635                 }
9636
9637                 if (init_extent_tree) {
9638                         printf("Creating a new extent tree\n");
9639                         ret = reinit_extent_tree(trans, info);
9640                         if (ret)
9641                                 goto close_out;
9642                 }
9643
9644                 if (init_csum_tree) {
9645                         fprintf(stderr, "Reinit crc root\n");
9646                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9647                         if (ret) {
9648                                 fprintf(stderr, "crc root initialization failed\n");
9649                                 ret = -EIO;
9650                                 goto close_out;
9651                         }
9652
9653                         ret = fill_csum_tree(trans, info->csum_root,
9654                                              init_extent_tree);
9655                         if (ret) {
9656                                 fprintf(stderr, "crc refilling failed\n");
9657                                 return -EIO;
9658                         }
9659                 }
9660                 /*
9661                  * Ok now we commit and run the normal fsck, which will add
9662                  * extent entries for all of the items it finds.
9663                  */
9664                 ret = btrfs_commit_transaction(trans, info->extent_root);
9665                 if (ret)
9666                         goto close_out;
9667         }
9668         if (!extent_buffer_uptodate(info->extent_root->node)) {
9669                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9670                 ret = -EIO;
9671                 goto close_out;
9672         }
9673         if (!extent_buffer_uptodate(info->csum_root->node)) {
9674                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9675                 ret = -EIO;
9676                 goto close_out;
9677         }
9678
9679         if (!ctx.progress_enabled)
9680                 fprintf(stderr, "checking extents\n");
9681         ret = check_chunks_and_extents(root);
9682         if (ret)
9683                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9684
9685         ret = repair_root_items(info);
9686         if (ret < 0)
9687                 goto close_out;
9688         if (repair) {
9689                 fprintf(stderr, "Fixed %d roots.\n", ret);
9690                 ret = 0;
9691         } else if (ret > 0) {
9692                 fprintf(stderr,
9693                        "Found %d roots with an outdated root item.\n",
9694                        ret);
9695                 fprintf(stderr,
9696                         "Please run a filesystem check with the option --repair to fix them.\n");
9697                 ret = 1;
9698                 goto close_out;
9699         }
9700
9701         if (!ctx.progress_enabled)
9702                 fprintf(stderr, "checking free space cache\n");
9703         ret = check_space_cache(root);
9704         if (ret)
9705                 goto out;
9706
9707         /*
9708          * We used to have to have these hole extents in between our real
9709          * extents so if we don't have this flag set we need to make sure there
9710          * are no gaps in the file extents for inodes, otherwise we can just
9711          * ignore it when this happens.
9712          */
9713         no_holes = btrfs_fs_incompat(root->fs_info,
9714                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9715         if (!ctx.progress_enabled)
9716                 fprintf(stderr, "checking fs roots\n");
9717         ret = check_fs_roots(root, &root_cache);
9718         if (ret)
9719                 goto out;
9720
9721         fprintf(stderr, "checking csums\n");
9722         ret = check_csums(root);
9723         if (ret)
9724                 goto out;
9725
9726         fprintf(stderr, "checking root refs\n");
9727         ret = check_root_refs(root, &root_cache);
9728         if (ret)
9729                 goto out;
9730
9731         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9732                 struct extent_buffer *eb;
9733
9734                 eb = list_first_entry(&root->fs_info->recow_ebs,
9735                                       struct extent_buffer, recow);
9736                 list_del_init(&eb->recow);
9737                 ret = recow_extent_buffer(root, eb);
9738                 if (ret)
9739                         break;
9740         }
9741
9742         while (!list_empty(&delete_items)) {
9743                 struct bad_item *bad;
9744
9745                 bad = list_first_entry(&delete_items, struct bad_item, list);
9746                 list_del_init(&bad->list);
9747                 if (repair)
9748                         ret = delete_bad_item(root, bad);
9749                 free(bad);
9750         }
9751
9752         if (info->quota_enabled) {
9753                 int err;
9754                 fprintf(stderr, "checking quota groups\n");
9755                 err = qgroup_verify_all(info);
9756                 if (err)
9757                         goto out;
9758         }
9759
9760         if (!list_empty(&root->fs_info->recow_ebs)) {
9761                 fprintf(stderr, "Transid errors in file system\n");
9762                 ret = 1;
9763         }
9764 out:
9765         print_qgroup_report(0);
9766         if (found_old_backref) { /*
9767                  * there was a disk format change when mixed
9768                  * backref was in testing tree. The old format
9769                  * existed about one week.
9770                  */
9771                 printf("\n * Found old mixed backref format. "
9772                        "The old format is not supported! *"
9773                        "\n * Please mount the FS in readonly mode, "
9774                        "backup data and re-format the FS. *\n\n");
9775                 ret = 1;
9776         }
9777         printf("found %llu bytes used err is %d\n",
9778                (unsigned long long)bytes_used, ret);
9779         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9780         printf("total tree bytes: %llu\n",
9781                (unsigned long long)total_btree_bytes);
9782         printf("total fs tree bytes: %llu\n",
9783                (unsigned long long)total_fs_tree_bytes);
9784         printf("total extent tree bytes: %llu\n",
9785                (unsigned long long)total_extent_tree_bytes);
9786         printf("btree space waste bytes: %llu\n",
9787                (unsigned long long)btree_space_waste);
9788         printf("file data blocks allocated: %llu\n referenced %llu\n",
9789                 (unsigned long long)data_bytes_allocated,
9790                 (unsigned long long)data_bytes_referenced);
9791
9792         free_root_recs_tree(&root_cache);
9793 close_out:
9794         close_ctree(root);
9795 err_out:
9796         if (ctx.progress_enabled)
9797                 task_deinit(ctx.info);
9798
9799         return ret;
9800 }