btrfs-progs: handle errors in add_shared_node and fail in the caller
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "btrfsck.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
41 #include "backref.h"
42 #include "ulist.h"
43
44 enum task_position {
45         TASK_EXTENTS,
46         TASK_FREE_SPACE,
47         TASK_FS_ROOTS,
48         TASK_NOTHING, /* have to be the last element */
49 };
50
51 struct task_ctx {
52         int progress_enabled;
53         enum task_position tp;
54
55         struct task_info *info;
56 };
57
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75
76 static void *print_status_check(void *p)
77 {
78         struct task_ctx *priv = p;
79         const char work_indicator[] = { '.', 'o', 'O', 'o' };
80         uint32_t count = 0;
81         static char *task_position_string[] = {
82                 "checking extents",
83                 "checking free space cache",
84                 "checking fs roots",
85         };
86
87         task_period_start(priv->info, 1000 /* 1s */);
88
89         if (priv->tp == TASK_NOTHING)
90                 return NULL;
91
92         while (1) {
93                 printf("%s [%c]\r", task_position_string[priv->tp],
94                                 work_indicator[count % 4]);
95                 count++;
96                 fflush(stdout);
97                 task_period_wait(priv->info);
98         }
99         return NULL;
100 }
101
102 static int print_status_return(void *p)
103 {
104         printf("\n");
105         fflush(stdout);
106
107         return 0;
108 }
109
110 struct extent_backref {
111         struct list_head list;
112         unsigned int is_data:1;
113         unsigned int found_extent_tree:1;
114         unsigned int full_backref:1;
115         unsigned int found_ref:1;
116         unsigned int broken:1;
117 };
118
119 struct data_backref {
120         struct extent_backref node;
121         union {
122                 u64 parent;
123                 u64 root;
124         };
125         u64 owner;
126         u64 offset;
127         u64 disk_bytenr;
128         u64 bytes;
129         u64 ram_bytes;
130         u32 num_refs;
131         u32 found_ref;
132 };
133
134 /*
135  * Much like data_backref, just removed the undetermined members
136  * and change it to use list_head.
137  * During extent scan, it is stored in root->orphan_data_extent.
138  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
139  */
140 struct orphan_data_extent {
141         struct list_head list;
142         u64 root;
143         u64 objectid;
144         u64 offset;
145         u64 disk_bytenr;
146         u64 disk_len;
147 };
148
149 struct tree_backref {
150         struct extent_backref node;
151         union {
152                 u64 parent;
153                 u64 root;
154         };
155 };
156
157 struct extent_record {
158         struct list_head backrefs;
159         struct list_head dups;
160         struct list_head list;
161         struct cache_extent cache;
162         struct btrfs_disk_key parent_key;
163         u64 start;
164         u64 max_size;
165         u64 nr;
166         u64 refs;
167         u64 extent_item_refs;
168         u64 generation;
169         u64 parent_generation;
170         u64 info_objectid;
171         u32 num_duplicates;
172         u8 info_level;
173         int flag_block_full_backref;
174         unsigned int found_rec:1;
175         unsigned int content_checked:1;
176         unsigned int owner_ref_checked:1;
177         unsigned int is_root:1;
178         unsigned int metadata:1;
179         unsigned int bad_full_backref:1;
180         unsigned int crossing_stripes:1;
181         unsigned int wrong_chunk_type:1;
182 };
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         unsigned int filetype:8;
190         int errors;
191         unsigned int ref_type;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 struct root_item_record {
199         struct list_head list;
200         u64 objectid;
201         u64 bytenr;
202         u64 last_snapshot;
203         u8 level;
204         u8 drop_level;
205         int level_size;
206         struct btrfs_key drop_key;
207 };
208
209 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
211 #define REF_ERR_NO_INODE_REF            (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
214 #define REF_ERR_DUP_INODE_REF           (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF             (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
222
223 struct file_extent_hole {
224         struct rb_node node;
225         u64 start;
226         u64 len;
227 };
228
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
231 {
232         struct file_extent_hole *hole;
233
234         if (RB_EMPTY_ROOT(holes))
235                 return (u64)-1;
236
237         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
238         return hole->start;
239 }
240
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
242 {
243         struct file_extent_hole *hole1;
244         struct file_extent_hole *hole2;
245
246         hole1 = rb_entry(node1, struct file_extent_hole, node);
247         hole2 = rb_entry(node2, struct file_extent_hole, node);
248
249         if (hole1->start > hole2->start)
250                 return -1;
251         if (hole1->start < hole2->start)
252                 return 1;
253         /* Now hole1->start == hole2->start */
254         if (hole1->len >= hole2->len)
255                 /*
256                  * Hole 1 will be merge center
257                  * Same hole will be merged later
258                  */
259                 return -1;
260         /* Hole 2 will be merge center */
261         return 1;
262 }
263
264 /*
265  * Add a hole to the record
266  *
267  * This will do hole merge for copy_file_extent_holes(),
268  * which will ensure there won't be continuous holes.
269  */
270 static int add_file_extent_hole(struct rb_root *holes,
271                                 u64 start, u64 len)
272 {
273         struct file_extent_hole *hole;
274         struct file_extent_hole *prev = NULL;
275         struct file_extent_hole *next = NULL;
276
277         hole = malloc(sizeof(*hole));
278         if (!hole)
279                 return -ENOMEM;
280         hole->start = start;
281         hole->len = len;
282         /* Since compare will not return 0, no -EEXIST will happen */
283         rb_insert(holes, &hole->node, compare_hole);
284
285         /* simple merge with previous hole */
286         if (rb_prev(&hole->node))
287                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
288                                 node);
289         if (prev && prev->start + prev->len >= hole->start) {
290                 hole->len = hole->start + hole->len - prev->start;
291                 hole->start = prev->start;
292                 rb_erase(&prev->node, holes);
293                 free(prev);
294                 prev = NULL;
295         }
296
297         /* iterate merge with next holes */
298         while (1) {
299                 if (!rb_next(&hole->node))
300                         break;
301                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
302                                         node);
303                 if (hole->start + hole->len >= next->start) {
304                         if (hole->start + hole->len <= next->start + next->len)
305                                 hole->len = next->start + next->len -
306                                             hole->start;
307                         rb_erase(&next->node, holes);
308                         free(next);
309                         next = NULL;
310                 } else
311                         break;
312         }
313         return 0;
314 }
315
316 static int compare_hole_range(struct rb_node *node, void *data)
317 {
318         struct file_extent_hole *hole;
319         u64 start;
320
321         hole = (struct file_extent_hole *)data;
322         start = hole->start;
323
324         hole = rb_entry(node, struct file_extent_hole, node);
325         if (start < hole->start)
326                 return -1;
327         if (start >= hole->start && start < hole->start + hole->len)
328                 return 0;
329         return 1;
330 }
331
332 /*
333  * Delete a hole in the record
334  *
335  * This will do the hole split and is much restrict than add.
336  */
337 static int del_file_extent_hole(struct rb_root *holes,
338                                 u64 start, u64 len)
339 {
340         struct file_extent_hole *hole;
341         struct file_extent_hole tmp;
342         u64 prev_start = 0;
343         u64 prev_len = 0;
344         u64 next_start = 0;
345         u64 next_len = 0;
346         struct rb_node *node;
347         int have_prev = 0;
348         int have_next = 0;
349         int ret = 0;
350
351         tmp.start = start;
352         tmp.len = len;
353         node = rb_search(holes, &tmp, compare_hole_range, NULL);
354         if (!node)
355                 return -EEXIST;
356         hole = rb_entry(node, struct file_extent_hole, node);
357         if (start + len > hole->start + hole->len)
358                 return -EEXIST;
359
360         /*
361          * Now there will be no overflap, delete the hole and re-add the
362          * split(s) if they exists.
363          */
364         if (start > hole->start) {
365                 prev_start = hole->start;
366                 prev_len = start - hole->start;
367                 have_prev = 1;
368         }
369         if (hole->start + hole->len > start + len) {
370                 next_start = start + len;
371                 next_len = hole->start + hole->len - start - len;
372                 have_next = 1;
373         }
374         rb_erase(node, holes);
375         free(hole);
376         if (have_prev) {
377                 ret = add_file_extent_hole(holes, prev_start, prev_len);
378                 if (ret < 0)
379                         return ret;
380         }
381         if (have_next) {
382                 ret = add_file_extent_hole(holes, next_start, next_len);
383                 if (ret < 0)
384                         return ret;
385         }
386         return 0;
387 }
388
389 static int copy_file_extent_holes(struct rb_root *dst,
390                                   struct rb_root *src)
391 {
392         struct file_extent_hole *hole;
393         struct rb_node *node;
394         int ret = 0;
395
396         node = rb_first(src);
397         while (node) {
398                 hole = rb_entry(node, struct file_extent_hole, node);
399                 ret = add_file_extent_hole(dst, hole->start, hole->len);
400                 if (ret)
401                         break;
402                 node = rb_next(node);
403         }
404         return ret;
405 }
406
407 static void free_file_extent_holes(struct rb_root *holes)
408 {
409         struct rb_node *node;
410         struct file_extent_hole *hole;
411
412         node = rb_first(holes);
413         while (node) {
414                 hole = rb_entry(node, struct file_extent_hole, node);
415                 rb_erase(node, holes);
416                 free(hole);
417                 node = rb_first(holes);
418         }
419 }
420
421 struct inode_record {
422         struct list_head backrefs;
423         unsigned int checked:1;
424         unsigned int merging:1;
425         unsigned int found_inode_item:1;
426         unsigned int found_dir_item:1;
427         unsigned int found_file_extent:1;
428         unsigned int found_csum_item:1;
429         unsigned int some_csum_missing:1;
430         unsigned int nodatasum:1;
431         int errors;
432
433         u64 ino;
434         u32 nlink;
435         u32 imode;
436         u64 isize;
437         u64 nbytes;
438
439         u32 found_link;
440         u64 found_size;
441         u64 extent_start;
442         u64 extent_end;
443         struct rb_root holes;
444         struct list_head orphan_extents;
445
446         u32 refs;
447 };
448
449 #define I_ERR_NO_INODE_ITEM             (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
464
465 struct root_backref {
466         struct list_head list;
467         unsigned int found_dir_item:1;
468         unsigned int found_dir_index:1;
469         unsigned int found_back_ref:1;
470         unsigned int found_forward_ref:1;
471         unsigned int reachable:1;
472         int errors;
473         u64 ref_root;
474         u64 dir;
475         u64 index;
476         u16 namelen;
477         char name[0];
478 };
479
480 struct root_record {
481         struct list_head backrefs;
482         struct cache_extent cache;
483         unsigned int found_root_item:1;
484         u64 objectid;
485         u32 found_ref;
486 };
487
488 struct ptr_node {
489         struct cache_extent cache;
490         void *data;
491 };
492
493 struct shared_node {
494         struct cache_extent cache;
495         struct cache_tree root_cache;
496         struct cache_tree inode_cache;
497         struct inode_record *current;
498         u32 refs;
499 };
500
501 struct block_info {
502         u64 start;
503         u32 size;
504 };
505
506 struct walk_control {
507         struct cache_tree shared;
508         struct shared_node *nodes[BTRFS_MAX_LEVEL];
509         int active_node;
510         int root_level;
511 };
512
513 struct bad_item {
514         struct btrfs_key key;
515         u64 root_id;
516         struct list_head list;
517 };
518
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
520
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522                                  struct btrfs_root *root)
523 {
524         if (root->last_trans != trans->transid) {
525                 root->track_dirty = 1;
526                 root->last_trans = trans->transid;
527                 root->commit_root = root->node;
528                 extent_buffer_get(root->node);
529         }
530 }
531
532 static u8 imode_to_type(u32 imode)
533 {
534 #define S_SHIFT 12
535         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
537                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
538                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
539                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
540                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
541                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
542                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
543         };
544
545         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
546 #undef S_SHIFT
547 }
548
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
550 {
551         struct device_record *rec1;
552         struct device_record *rec2;
553
554         rec1 = rb_entry(node1, struct device_record, node);
555         rec2 = rb_entry(node2, struct device_record, node);
556         if (rec1->devid > rec2->devid)
557                 return -1;
558         else if (rec1->devid < rec2->devid)
559                 return 1;
560         else
561                 return 0;
562 }
563
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
565 {
566         struct inode_record *rec;
567         struct inode_backref *backref;
568         struct inode_backref *orig;
569         struct inode_backref *tmp;
570         struct orphan_data_extent *src_orphan;
571         struct orphan_data_extent *dst_orphan;
572         size_t size;
573         int ret;
574
575         rec = malloc(sizeof(*rec));
576         if (!rec)
577                 return ERR_PTR(-ENOMEM);
578         memcpy(rec, orig_rec, sizeof(*rec));
579         rec->refs = 1;
580         INIT_LIST_HEAD(&rec->backrefs);
581         INIT_LIST_HEAD(&rec->orphan_extents);
582         rec->holes = RB_ROOT;
583
584         list_for_each_entry(orig, &orig_rec->backrefs, list) {
585                 size = sizeof(*orig) + orig->namelen + 1;
586                 backref = malloc(size);
587                 if (!backref) {
588                         ret = -ENOMEM;
589                         goto cleanup;
590                 }
591                 memcpy(backref, orig, size);
592                 list_add_tail(&backref->list, &rec->backrefs);
593         }
594         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
595                 dst_orphan = malloc(sizeof(*dst_orphan));
596                 if (!dst_orphan) {
597                         ret = -ENOMEM;
598                         goto cleanup;
599                 }
600                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
601                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
602         }
603         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
604         BUG_ON(ret < 0);
605
606         return rec;
607
608 cleanup:
609         if (!list_empty(&rec->backrefs))
610                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
611                         list_del(&orig->list);
612                         free(orig);
613                 }
614
615         if (!list_empty(&rec->orphan_extents))
616                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
617                         list_del(&orig->list);
618                         free(orig);
619                 }
620
621         free(rec);
622
623         return ERR_PTR(ret);
624 }
625
626 static void print_orphan_data_extents(struct list_head *orphan_extents,
627                                       u64 objectid)
628 {
629         struct orphan_data_extent *orphan;
630
631         if (list_empty(orphan_extents))
632                 return;
633         printf("The following data extent is lost in tree %llu:\n",
634                objectid);
635         list_for_each_entry(orphan, orphan_extents, list) {
636                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
637                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
638                        orphan->disk_len);
639         }
640 }
641
642 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
643 {
644         u64 root_objectid = root->root_key.objectid;
645         int errors = rec->errors;
646
647         if (!errors)
648                 return;
649         /* reloc root errors, we print its corresponding fs root objectid*/
650         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
651                 root_objectid = root->root_key.offset;
652                 fprintf(stderr, "reloc");
653         }
654         fprintf(stderr, "root %llu inode %llu errors %x",
655                 (unsigned long long) root_objectid,
656                 (unsigned long long) rec->ino, rec->errors);
657
658         if (errors & I_ERR_NO_INODE_ITEM)
659                 fprintf(stderr, ", no inode item");
660         if (errors & I_ERR_NO_ORPHAN_ITEM)
661                 fprintf(stderr, ", no orphan item");
662         if (errors & I_ERR_DUP_INODE_ITEM)
663                 fprintf(stderr, ", dup inode item");
664         if (errors & I_ERR_DUP_DIR_INDEX)
665                 fprintf(stderr, ", dup dir index");
666         if (errors & I_ERR_ODD_DIR_ITEM)
667                 fprintf(stderr, ", odd dir item");
668         if (errors & I_ERR_ODD_FILE_EXTENT)
669                 fprintf(stderr, ", odd file extent");
670         if (errors & I_ERR_BAD_FILE_EXTENT)
671                 fprintf(stderr, ", bad file extent");
672         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
673                 fprintf(stderr, ", file extent overlap");
674         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
675                 fprintf(stderr, ", file extent discount");
676         if (errors & I_ERR_DIR_ISIZE_WRONG)
677                 fprintf(stderr, ", dir isize wrong");
678         if (errors & I_ERR_FILE_NBYTES_WRONG)
679                 fprintf(stderr, ", nbytes wrong");
680         if (errors & I_ERR_ODD_CSUM_ITEM)
681                 fprintf(stderr, ", odd csum item");
682         if (errors & I_ERR_SOME_CSUM_MISSING)
683                 fprintf(stderr, ", some csum missing");
684         if (errors & I_ERR_LINK_COUNT_WRONG)
685                 fprintf(stderr, ", link count wrong");
686         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
687                 fprintf(stderr, ", orphan file extent");
688         fprintf(stderr, "\n");
689         /* Print the orphan extents if needed */
690         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
691                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
692
693         /* Print the holes if needed */
694         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
695                 struct file_extent_hole *hole;
696                 struct rb_node *node;
697                 int found = 0;
698
699                 node = rb_first(&rec->holes);
700                 fprintf(stderr, "Found file extent holes:\n");
701                 while (node) {
702                         found = 1;
703                         hole = rb_entry(node, struct file_extent_hole, node);
704                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
705                                 hole->start, hole->len);
706                         node = rb_next(node);
707                 }
708                 if (!found)
709                         fprintf(stderr, "\tstart: 0, len: %llu\n",
710                                 round_up(rec->isize, root->sectorsize));
711         }
712 }
713
714 static void print_ref_error(int errors)
715 {
716         if (errors & REF_ERR_NO_DIR_ITEM)
717                 fprintf(stderr, ", no dir item");
718         if (errors & REF_ERR_NO_DIR_INDEX)
719                 fprintf(stderr, ", no dir index");
720         if (errors & REF_ERR_NO_INODE_REF)
721                 fprintf(stderr, ", no inode ref");
722         if (errors & REF_ERR_DUP_DIR_ITEM)
723                 fprintf(stderr, ", dup dir item");
724         if (errors & REF_ERR_DUP_DIR_INDEX)
725                 fprintf(stderr, ", dup dir index");
726         if (errors & REF_ERR_DUP_INODE_REF)
727                 fprintf(stderr, ", dup inode ref");
728         if (errors & REF_ERR_INDEX_UNMATCH)
729                 fprintf(stderr, ", index unmatch");
730         if (errors & REF_ERR_FILETYPE_UNMATCH)
731                 fprintf(stderr, ", filetype unmatch");
732         if (errors & REF_ERR_NAME_TOO_LONG)
733                 fprintf(stderr, ", name too long");
734         if (errors & REF_ERR_NO_ROOT_REF)
735                 fprintf(stderr, ", no root ref");
736         if (errors & REF_ERR_NO_ROOT_BACKREF)
737                 fprintf(stderr, ", no root backref");
738         if (errors & REF_ERR_DUP_ROOT_REF)
739                 fprintf(stderr, ", dup root ref");
740         if (errors & REF_ERR_DUP_ROOT_BACKREF)
741                 fprintf(stderr, ", dup root backref");
742         fprintf(stderr, "\n");
743 }
744
745 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
746                                           u64 ino, int mod)
747 {
748         struct ptr_node *node;
749         struct cache_extent *cache;
750         struct inode_record *rec = NULL;
751         int ret;
752
753         cache = lookup_cache_extent(inode_cache, ino, 1);
754         if (cache) {
755                 node = container_of(cache, struct ptr_node, cache);
756                 rec = node->data;
757                 if (mod && rec->refs > 1) {
758                         node->data = clone_inode_rec(rec);
759                         if (IS_ERR(node->data))
760                                 return node->data;
761                         rec->refs--;
762                         rec = node->data;
763                 }
764         } else if (mod) {
765                 rec = calloc(1, sizeof(*rec));
766                 if (!rec)
767                         return ERR_PTR(-ENOMEM);
768                 rec->ino = ino;
769                 rec->extent_start = (u64)-1;
770                 rec->refs = 1;
771                 INIT_LIST_HEAD(&rec->backrefs);
772                 INIT_LIST_HEAD(&rec->orphan_extents);
773                 rec->holes = RB_ROOT;
774
775                 node = malloc(sizeof(*node));
776                 if (!node) {
777                         free(rec);
778                         return ERR_PTR(-ENOMEM);
779                 }
780                 node->cache.start = ino;
781                 node->cache.size = 1;
782                 node->data = rec;
783
784                 if (ino == BTRFS_FREE_INO_OBJECTID)
785                         rec->found_link = 1;
786
787                 ret = insert_cache_extent(inode_cache, &node->cache);
788                 if (ret)
789                         return ERR_PTR(-EEXIST);
790         }
791         return rec;
792 }
793
794 static void free_orphan_data_extents(struct list_head *orphan_extents)
795 {
796         struct orphan_data_extent *orphan;
797
798         while (!list_empty(orphan_extents)) {
799                 orphan = list_entry(orphan_extents->next,
800                                     struct orphan_data_extent, list);
801                 list_del(&orphan->list);
802                 free(orphan);
803         }
804 }
805
806 static void free_inode_rec(struct inode_record *rec)
807 {
808         struct inode_backref *backref;
809
810         if (--rec->refs > 0)
811                 return;
812
813         while (!list_empty(&rec->backrefs)) {
814                 backref = list_entry(rec->backrefs.next,
815                                      struct inode_backref, list);
816                 list_del(&backref->list);
817                 free(backref);
818         }
819         free_orphan_data_extents(&rec->orphan_extents);
820         free_file_extent_holes(&rec->holes);
821         free(rec);
822 }
823
824 static int can_free_inode_rec(struct inode_record *rec)
825 {
826         if (!rec->errors && rec->checked && rec->found_inode_item &&
827             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
828                 return 1;
829         return 0;
830 }
831
832 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
833                                  struct inode_record *rec)
834 {
835         struct cache_extent *cache;
836         struct inode_backref *tmp, *backref;
837         struct ptr_node *node;
838         unsigned char filetype;
839
840         if (!rec->found_inode_item)
841                 return;
842
843         filetype = imode_to_type(rec->imode);
844         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
845                 if (backref->found_dir_item && backref->found_dir_index) {
846                         if (backref->filetype != filetype)
847                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
848                         if (!backref->errors && backref->found_inode_ref &&
849                             rec->nlink == rec->found_link) {
850                                 list_del(&backref->list);
851                                 free(backref);
852                         }
853                 }
854         }
855
856         if (!rec->checked || rec->merging)
857                 return;
858
859         if (S_ISDIR(rec->imode)) {
860                 if (rec->found_size != rec->isize)
861                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
862                 if (rec->found_file_extent)
863                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
864         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
865                 if (rec->found_dir_item)
866                         rec->errors |= I_ERR_ODD_DIR_ITEM;
867                 if (rec->found_size != rec->nbytes)
868                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
869                 if (rec->nlink > 0 && !no_holes &&
870                     (rec->extent_end < rec->isize ||
871                      first_extent_gap(&rec->holes) < rec->isize))
872                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
873         }
874
875         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
876                 if (rec->found_csum_item && rec->nodatasum)
877                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
878                 if (rec->some_csum_missing && !rec->nodatasum)
879                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
880         }
881
882         BUG_ON(rec->refs != 1);
883         if (can_free_inode_rec(rec)) {
884                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
885                 node = container_of(cache, struct ptr_node, cache);
886                 BUG_ON(node->data != rec);
887                 remove_cache_extent(inode_cache, &node->cache);
888                 free(node);
889                 free_inode_rec(rec);
890         }
891 }
892
893 static int check_orphan_item(struct btrfs_root *root, u64 ino)
894 {
895         struct btrfs_path path;
896         struct btrfs_key key;
897         int ret;
898
899         key.objectid = BTRFS_ORPHAN_OBJECTID;
900         key.type = BTRFS_ORPHAN_ITEM_KEY;
901         key.offset = ino;
902
903         btrfs_init_path(&path);
904         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
905         btrfs_release_path(&path);
906         if (ret > 0)
907                 ret = -ENOENT;
908         return ret;
909 }
910
911 static int process_inode_item(struct extent_buffer *eb,
912                               int slot, struct btrfs_key *key,
913                               struct shared_node *active_node)
914 {
915         struct inode_record *rec;
916         struct btrfs_inode_item *item;
917
918         rec = active_node->current;
919         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
920         if (rec->found_inode_item) {
921                 rec->errors |= I_ERR_DUP_INODE_ITEM;
922                 return 1;
923         }
924         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
925         rec->nlink = btrfs_inode_nlink(eb, item);
926         rec->isize = btrfs_inode_size(eb, item);
927         rec->nbytes = btrfs_inode_nbytes(eb, item);
928         rec->imode = btrfs_inode_mode(eb, item);
929         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
930                 rec->nodatasum = 1;
931         rec->found_inode_item = 1;
932         if (rec->nlink == 0)
933                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
934         maybe_free_inode_rec(&active_node->inode_cache, rec);
935         return 0;
936 }
937
938 static struct inode_backref *get_inode_backref(struct inode_record *rec,
939                                                 const char *name,
940                                                 int namelen, u64 dir)
941 {
942         struct inode_backref *backref;
943
944         list_for_each_entry(backref, &rec->backrefs, list) {
945                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
946                         break;
947                 if (backref->dir != dir || backref->namelen != namelen)
948                         continue;
949                 if (memcmp(name, backref->name, namelen))
950                         continue;
951                 return backref;
952         }
953
954         backref = malloc(sizeof(*backref) + namelen + 1);
955         memset(backref, 0, sizeof(*backref));
956         backref->dir = dir;
957         backref->namelen = namelen;
958         memcpy(backref->name, name, namelen);
959         backref->name[namelen] = '\0';
960         list_add_tail(&backref->list, &rec->backrefs);
961         return backref;
962 }
963
964 static int add_inode_backref(struct cache_tree *inode_cache,
965                              u64 ino, u64 dir, u64 index,
966                              const char *name, int namelen,
967                              int filetype, int itemtype, int errors)
968 {
969         struct inode_record *rec;
970         struct inode_backref *backref;
971
972         rec = get_inode_rec(inode_cache, ino, 1);
973         BUG_ON(IS_ERR(rec));
974         backref = get_inode_backref(rec, name, namelen, dir);
975         if (errors)
976                 backref->errors |= errors;
977         if (itemtype == BTRFS_DIR_INDEX_KEY) {
978                 if (backref->found_dir_index)
979                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
980                 if (backref->found_inode_ref && backref->index != index)
981                         backref->errors |= REF_ERR_INDEX_UNMATCH;
982                 if (backref->found_dir_item && backref->filetype != filetype)
983                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
984
985                 backref->index = index;
986                 backref->filetype = filetype;
987                 backref->found_dir_index = 1;
988         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
989                 rec->found_link++;
990                 if (backref->found_dir_item)
991                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
992                 if (backref->found_dir_index && backref->filetype != filetype)
993                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
994
995                 backref->filetype = filetype;
996                 backref->found_dir_item = 1;
997         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
998                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
999                 if (backref->found_inode_ref)
1000                         backref->errors |= REF_ERR_DUP_INODE_REF;
1001                 if (backref->found_dir_index && backref->index != index)
1002                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1003                 else
1004                         backref->index = index;
1005
1006                 backref->ref_type = itemtype;
1007                 backref->found_inode_ref = 1;
1008         } else {
1009                 BUG_ON(1);
1010         }
1011
1012         maybe_free_inode_rec(inode_cache, rec);
1013         return 0;
1014 }
1015
1016 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1017                             struct cache_tree *dst_cache)
1018 {
1019         struct inode_backref *backref;
1020         u32 dir_count = 0;
1021         int ret = 0;
1022
1023         dst->merging = 1;
1024         list_for_each_entry(backref, &src->backrefs, list) {
1025                 if (backref->found_dir_index) {
1026                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1027                                         backref->index, backref->name,
1028                                         backref->namelen, backref->filetype,
1029                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1030                 }
1031                 if (backref->found_dir_item) {
1032                         dir_count++;
1033                         add_inode_backref(dst_cache, dst->ino,
1034                                         backref->dir, 0, backref->name,
1035                                         backref->namelen, backref->filetype,
1036                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1037                 }
1038                 if (backref->found_inode_ref) {
1039                         add_inode_backref(dst_cache, dst->ino,
1040                                         backref->dir, backref->index,
1041                                         backref->name, backref->namelen, 0,
1042                                         backref->ref_type, backref->errors);
1043                 }
1044         }
1045
1046         if (src->found_dir_item)
1047                 dst->found_dir_item = 1;
1048         if (src->found_file_extent)
1049                 dst->found_file_extent = 1;
1050         if (src->found_csum_item)
1051                 dst->found_csum_item = 1;
1052         if (src->some_csum_missing)
1053                 dst->some_csum_missing = 1;
1054         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1055                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1056                 if (ret < 0)
1057                         return ret;
1058         }
1059
1060         BUG_ON(src->found_link < dir_count);
1061         dst->found_link += src->found_link - dir_count;
1062         dst->found_size += src->found_size;
1063         if (src->extent_start != (u64)-1) {
1064                 if (dst->extent_start == (u64)-1) {
1065                         dst->extent_start = src->extent_start;
1066                         dst->extent_end = src->extent_end;
1067                 } else {
1068                         if (dst->extent_end > src->extent_start)
1069                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1070                         else if (dst->extent_end < src->extent_start) {
1071                                 ret = add_file_extent_hole(&dst->holes,
1072                                         dst->extent_end,
1073                                         src->extent_start - dst->extent_end);
1074                         }
1075                         if (dst->extent_end < src->extent_end)
1076                                 dst->extent_end = src->extent_end;
1077                 }
1078         }
1079
1080         dst->errors |= src->errors;
1081         if (src->found_inode_item) {
1082                 if (!dst->found_inode_item) {
1083                         dst->nlink = src->nlink;
1084                         dst->isize = src->isize;
1085                         dst->nbytes = src->nbytes;
1086                         dst->imode = src->imode;
1087                         dst->nodatasum = src->nodatasum;
1088                         dst->found_inode_item = 1;
1089                 } else {
1090                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1091                 }
1092         }
1093         dst->merging = 0;
1094
1095         return 0;
1096 }
1097
1098 static int splice_shared_node(struct shared_node *src_node,
1099                               struct shared_node *dst_node)
1100 {
1101         struct cache_extent *cache;
1102         struct ptr_node *node, *ins;
1103         struct cache_tree *src, *dst;
1104         struct inode_record *rec, *conflict;
1105         u64 current_ino = 0;
1106         int splice = 0;
1107         int ret;
1108
1109         if (--src_node->refs == 0)
1110                 splice = 1;
1111         if (src_node->current)
1112                 current_ino = src_node->current->ino;
1113
1114         src = &src_node->root_cache;
1115         dst = &dst_node->root_cache;
1116 again:
1117         cache = search_cache_extent(src, 0);
1118         while (cache) {
1119                 node = container_of(cache, struct ptr_node, cache);
1120                 rec = node->data;
1121                 cache = next_cache_extent(cache);
1122
1123                 if (splice) {
1124                         remove_cache_extent(src, &node->cache);
1125                         ins = node;
1126                 } else {
1127                         ins = malloc(sizeof(*ins));
1128                         ins->cache.start = node->cache.start;
1129                         ins->cache.size = node->cache.size;
1130                         ins->data = rec;
1131                         rec->refs++;
1132                 }
1133                 ret = insert_cache_extent(dst, &ins->cache);
1134                 if (ret == -EEXIST) {
1135                         conflict = get_inode_rec(dst, rec->ino, 1);
1136                         BUG_ON(IS_ERR(conflict));
1137                         merge_inode_recs(rec, conflict, dst);
1138                         if (rec->checked) {
1139                                 conflict->checked = 1;
1140                                 if (dst_node->current == conflict)
1141                                         dst_node->current = NULL;
1142                         }
1143                         maybe_free_inode_rec(dst, conflict);
1144                         free_inode_rec(rec);
1145                         free(ins);
1146                 } else {
1147                         BUG_ON(ret);
1148                 }
1149         }
1150
1151         if (src == &src_node->root_cache) {
1152                 src = &src_node->inode_cache;
1153                 dst = &dst_node->inode_cache;
1154                 goto again;
1155         }
1156
1157         if (current_ino > 0 && (!dst_node->current ||
1158             current_ino > dst_node->current->ino)) {
1159                 if (dst_node->current) {
1160                         dst_node->current->checked = 1;
1161                         maybe_free_inode_rec(dst, dst_node->current);
1162                 }
1163                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1164                 BUG_ON(IS_ERR(dst_node->current));
1165         }
1166         return 0;
1167 }
1168
1169 static void free_inode_ptr(struct cache_extent *cache)
1170 {
1171         struct ptr_node *node;
1172         struct inode_record *rec;
1173
1174         node = container_of(cache, struct ptr_node, cache);
1175         rec = node->data;
1176         free_inode_rec(rec);
1177         free(node);
1178 }
1179
1180 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1181
1182 static struct shared_node *find_shared_node(struct cache_tree *shared,
1183                                             u64 bytenr)
1184 {
1185         struct cache_extent *cache;
1186         struct shared_node *node;
1187
1188         cache = lookup_cache_extent(shared, bytenr, 1);
1189         if (cache) {
1190                 node = container_of(cache, struct shared_node, cache);
1191                 return node;
1192         }
1193         return NULL;
1194 }
1195
1196 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1197 {
1198         int ret;
1199         struct shared_node *node;
1200
1201         node = calloc(1, sizeof(*node));
1202         if (!node)
1203                 return -ENOMEM;
1204         node->cache.start = bytenr;
1205         node->cache.size = 1;
1206         cache_tree_init(&node->root_cache);
1207         cache_tree_init(&node->inode_cache);
1208         node->refs = refs;
1209
1210         ret = insert_cache_extent(shared, &node->cache);
1211
1212         return ret;
1213 }
1214
1215 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1216                              struct walk_control *wc, int level)
1217 {
1218         struct shared_node *node;
1219         struct shared_node *dest;
1220         int ret;
1221
1222         if (level == wc->active_node)
1223                 return 0;
1224
1225         BUG_ON(wc->active_node <= level);
1226         node = find_shared_node(&wc->shared, bytenr);
1227         if (!node) {
1228                 ret = add_shared_node(&wc->shared, bytenr, refs);
1229                 BUG_ON(ret);
1230                 node = find_shared_node(&wc->shared, bytenr);
1231                 wc->nodes[level] = node;
1232                 wc->active_node = level;
1233                 return 0;
1234         }
1235
1236         if (wc->root_level == wc->active_node &&
1237             btrfs_root_refs(&root->root_item) == 0) {
1238                 if (--node->refs == 0) {
1239                         free_inode_recs_tree(&node->root_cache);
1240                         free_inode_recs_tree(&node->inode_cache);
1241                         remove_cache_extent(&wc->shared, &node->cache);
1242                         free(node);
1243                 }
1244                 return 1;
1245         }
1246
1247         dest = wc->nodes[wc->active_node];
1248         splice_shared_node(node, dest);
1249         if (node->refs == 0) {
1250                 remove_cache_extent(&wc->shared, &node->cache);
1251                 free(node);
1252         }
1253         return 1;
1254 }
1255
1256 static int leave_shared_node(struct btrfs_root *root,
1257                              struct walk_control *wc, int level)
1258 {
1259         struct shared_node *node;
1260         struct shared_node *dest;
1261         int i;
1262
1263         if (level == wc->root_level)
1264                 return 0;
1265
1266         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1267                 if (wc->nodes[i])
1268                         break;
1269         }
1270         BUG_ON(i >= BTRFS_MAX_LEVEL);
1271
1272         node = wc->nodes[wc->active_node];
1273         wc->nodes[wc->active_node] = NULL;
1274         wc->active_node = i;
1275
1276         dest = wc->nodes[wc->active_node];
1277         if (wc->active_node < wc->root_level ||
1278             btrfs_root_refs(&root->root_item) > 0) {
1279                 BUG_ON(node->refs <= 1);
1280                 splice_shared_node(node, dest);
1281         } else {
1282                 BUG_ON(node->refs < 2);
1283                 node->refs--;
1284         }
1285         return 0;
1286 }
1287
1288 /*
1289  * Returns:
1290  * < 0 - on error
1291  * 1   - if the root with id child_root_id is a child of root parent_root_id
1292  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1293  *       has other root(s) as parent(s)
1294  * 2   - if the root child_root_id doesn't have any parent roots
1295  */
1296 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1297                          u64 child_root_id)
1298 {
1299         struct btrfs_path path;
1300         struct btrfs_key key;
1301         struct extent_buffer *leaf;
1302         int has_parent = 0;
1303         int ret;
1304
1305         btrfs_init_path(&path);
1306
1307         key.objectid = parent_root_id;
1308         key.type = BTRFS_ROOT_REF_KEY;
1309         key.offset = child_root_id;
1310         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1311                                 0, 0);
1312         if (ret < 0)
1313                 return ret;
1314         btrfs_release_path(&path);
1315         if (!ret)
1316                 return 1;
1317
1318         key.objectid = child_root_id;
1319         key.type = BTRFS_ROOT_BACKREF_KEY;
1320         key.offset = 0;
1321         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1322                                 0, 0);
1323         if (ret < 0)
1324                 goto out;
1325
1326         while (1) {
1327                 leaf = path.nodes[0];
1328                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1329                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1330                         if (ret)
1331                                 break;
1332                         leaf = path.nodes[0];
1333                 }
1334
1335                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1336                 if (key.objectid != child_root_id ||
1337                     key.type != BTRFS_ROOT_BACKREF_KEY)
1338                         break;
1339
1340                 has_parent = 1;
1341
1342                 if (key.offset == parent_root_id) {
1343                         btrfs_release_path(&path);
1344                         return 1;
1345                 }
1346
1347                 path.slots[0]++;
1348         }
1349 out:
1350         btrfs_release_path(&path);
1351         if (ret < 0)
1352                 return ret;
1353         return has_parent ? 0 : 2;
1354 }
1355
1356 static int process_dir_item(struct btrfs_root *root,
1357                             struct extent_buffer *eb,
1358                             int slot, struct btrfs_key *key,
1359                             struct shared_node *active_node)
1360 {
1361         u32 total;
1362         u32 cur = 0;
1363         u32 len;
1364         u32 name_len;
1365         u32 data_len;
1366         int error;
1367         int nritems = 0;
1368         int filetype;
1369         struct btrfs_dir_item *di;
1370         struct inode_record *rec;
1371         struct cache_tree *root_cache;
1372         struct cache_tree *inode_cache;
1373         struct btrfs_key location;
1374         char namebuf[BTRFS_NAME_LEN];
1375
1376         root_cache = &active_node->root_cache;
1377         inode_cache = &active_node->inode_cache;
1378         rec = active_node->current;
1379         rec->found_dir_item = 1;
1380
1381         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1382         total = btrfs_item_size_nr(eb, slot);
1383         while (cur < total) {
1384                 nritems++;
1385                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1386                 name_len = btrfs_dir_name_len(eb, di);
1387                 data_len = btrfs_dir_data_len(eb, di);
1388                 filetype = btrfs_dir_type(eb, di);
1389
1390                 rec->found_size += name_len;
1391                 if (name_len <= BTRFS_NAME_LEN) {
1392                         len = name_len;
1393                         error = 0;
1394                 } else {
1395                         len = BTRFS_NAME_LEN;
1396                         error = REF_ERR_NAME_TOO_LONG;
1397                 }
1398                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1399
1400                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1401                         add_inode_backref(inode_cache, location.objectid,
1402                                           key->objectid, key->offset, namebuf,
1403                                           len, filetype, key->type, error);
1404                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1405                         add_inode_backref(root_cache, location.objectid,
1406                                           key->objectid, key->offset,
1407                                           namebuf, len, filetype,
1408                                           key->type, error);
1409                 } else {
1410                         fprintf(stderr, "invalid location in dir item %u\n",
1411                                 location.type);
1412                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1413                                           key->objectid, key->offset, namebuf,
1414                                           len, filetype, key->type, error);
1415                 }
1416
1417                 len = sizeof(*di) + name_len + data_len;
1418                 di = (struct btrfs_dir_item *)((char *)di + len);
1419                 cur += len;
1420         }
1421         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1422                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1423
1424         return 0;
1425 }
1426
1427 static int process_inode_ref(struct extent_buffer *eb,
1428                              int slot, struct btrfs_key *key,
1429                              struct shared_node *active_node)
1430 {
1431         u32 total;
1432         u32 cur = 0;
1433         u32 len;
1434         u32 name_len;
1435         u64 index;
1436         int error;
1437         struct cache_tree *inode_cache;
1438         struct btrfs_inode_ref *ref;
1439         char namebuf[BTRFS_NAME_LEN];
1440
1441         inode_cache = &active_node->inode_cache;
1442
1443         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1444         total = btrfs_item_size_nr(eb, slot);
1445         while (cur < total) {
1446                 name_len = btrfs_inode_ref_name_len(eb, ref);
1447                 index = btrfs_inode_ref_index(eb, ref);
1448                 if (name_len <= BTRFS_NAME_LEN) {
1449                         len = name_len;
1450                         error = 0;
1451                 } else {
1452                         len = BTRFS_NAME_LEN;
1453                         error = REF_ERR_NAME_TOO_LONG;
1454                 }
1455                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1456                 add_inode_backref(inode_cache, key->objectid, key->offset,
1457                                   index, namebuf, len, 0, key->type, error);
1458
1459                 len = sizeof(*ref) + name_len;
1460                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1461                 cur += len;
1462         }
1463         return 0;
1464 }
1465
1466 static int process_inode_extref(struct extent_buffer *eb,
1467                                 int slot, struct btrfs_key *key,
1468                                 struct shared_node *active_node)
1469 {
1470         u32 total;
1471         u32 cur = 0;
1472         u32 len;
1473         u32 name_len;
1474         u64 index;
1475         u64 parent;
1476         int error;
1477         struct cache_tree *inode_cache;
1478         struct btrfs_inode_extref *extref;
1479         char namebuf[BTRFS_NAME_LEN];
1480
1481         inode_cache = &active_node->inode_cache;
1482
1483         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1484         total = btrfs_item_size_nr(eb, slot);
1485         while (cur < total) {
1486                 name_len = btrfs_inode_extref_name_len(eb, extref);
1487                 index = btrfs_inode_extref_index(eb, extref);
1488                 parent = btrfs_inode_extref_parent(eb, extref);
1489                 if (name_len <= BTRFS_NAME_LEN) {
1490                         len = name_len;
1491                         error = 0;
1492                 } else {
1493                         len = BTRFS_NAME_LEN;
1494                         error = REF_ERR_NAME_TOO_LONG;
1495                 }
1496                 read_extent_buffer(eb, namebuf,
1497                                    (unsigned long)(extref + 1), len);
1498                 add_inode_backref(inode_cache, key->objectid, parent,
1499                                   index, namebuf, len, 0, key->type, error);
1500
1501                 len = sizeof(*extref) + name_len;
1502                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1503                 cur += len;
1504         }
1505         return 0;
1506
1507 }
1508
1509 static int count_csum_range(struct btrfs_root *root, u64 start,
1510                             u64 len, u64 *found)
1511 {
1512         struct btrfs_key key;
1513         struct btrfs_path path;
1514         struct extent_buffer *leaf;
1515         int ret;
1516         size_t size;
1517         *found = 0;
1518         u64 csum_end;
1519         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1520
1521         btrfs_init_path(&path);
1522
1523         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1524         key.offset = start;
1525         key.type = BTRFS_EXTENT_CSUM_KEY;
1526
1527         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1528                                 &key, &path, 0, 0);
1529         if (ret < 0)
1530                 goto out;
1531         if (ret > 0 && path.slots[0] > 0) {
1532                 leaf = path.nodes[0];
1533                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1534                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1535                     key.type == BTRFS_EXTENT_CSUM_KEY)
1536                         path.slots[0]--;
1537         }
1538
1539         while (len > 0) {
1540                 leaf = path.nodes[0];
1541                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1542                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1543                         if (ret > 0)
1544                                 break;
1545                         else if (ret < 0)
1546                                 goto out;
1547                         leaf = path.nodes[0];
1548                 }
1549
1550                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1551                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1552                     key.type != BTRFS_EXTENT_CSUM_KEY)
1553                         break;
1554
1555                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1556                 if (key.offset >= start + len)
1557                         break;
1558
1559                 if (key.offset > start)
1560                         start = key.offset;
1561
1562                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1563                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1564                 if (csum_end > start) {
1565                         size = min(csum_end - start, len);
1566                         len -= size;
1567                         start += size;
1568                         *found += size;
1569                 }
1570
1571                 path.slots[0]++;
1572         }
1573 out:
1574         btrfs_release_path(&path);
1575         if (ret < 0)
1576                 return ret;
1577         return 0;
1578 }
1579
1580 static int process_file_extent(struct btrfs_root *root,
1581                                 struct extent_buffer *eb,
1582                                 int slot, struct btrfs_key *key,
1583                                 struct shared_node *active_node)
1584 {
1585         struct inode_record *rec;
1586         struct btrfs_file_extent_item *fi;
1587         u64 num_bytes = 0;
1588         u64 disk_bytenr = 0;
1589         u64 extent_offset = 0;
1590         u64 mask = root->sectorsize - 1;
1591         int extent_type;
1592         int ret;
1593
1594         rec = active_node->current;
1595         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1596         rec->found_file_extent = 1;
1597
1598         if (rec->extent_start == (u64)-1) {
1599                 rec->extent_start = key->offset;
1600                 rec->extent_end = key->offset;
1601         }
1602
1603         if (rec->extent_end > key->offset)
1604                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1605         else if (rec->extent_end < key->offset) {
1606                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1607                                            key->offset - rec->extent_end);
1608                 if (ret < 0)
1609                         return ret;
1610         }
1611
1612         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1613         extent_type = btrfs_file_extent_type(eb, fi);
1614
1615         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1616                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1617                 if (num_bytes == 0)
1618                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1619                 rec->found_size += num_bytes;
1620                 num_bytes = (num_bytes + mask) & ~mask;
1621         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1622                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1623                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1624                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1625                 extent_offset = btrfs_file_extent_offset(eb, fi);
1626                 if (num_bytes == 0 || (num_bytes & mask))
1627                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1628                 if (num_bytes + extent_offset >
1629                     btrfs_file_extent_ram_bytes(eb, fi))
1630                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1631                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1632                     (btrfs_file_extent_compression(eb, fi) ||
1633                      btrfs_file_extent_encryption(eb, fi) ||
1634                      btrfs_file_extent_other_encoding(eb, fi)))
1635                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1636                 if (disk_bytenr > 0)
1637                         rec->found_size += num_bytes;
1638         } else {
1639                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1640         }
1641         rec->extent_end = key->offset + num_bytes;
1642
1643         /*
1644          * The data reloc tree will copy full extents into its inode and then
1645          * copy the corresponding csums.  Because the extent it copied could be
1646          * a preallocated extent that hasn't been written to yet there may be no
1647          * csums to copy, ergo we won't have csums for our file extent.  This is
1648          * ok so just don't bother checking csums if the inode belongs to the
1649          * data reloc tree.
1650          */
1651         if (disk_bytenr > 0 &&
1652             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1653                 u64 found;
1654                 if (btrfs_file_extent_compression(eb, fi))
1655                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1656                 else
1657                         disk_bytenr += extent_offset;
1658
1659                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1660                 if (ret < 0)
1661                         return ret;
1662                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1663                         if (found > 0)
1664                                 rec->found_csum_item = 1;
1665                         if (found < num_bytes)
1666                                 rec->some_csum_missing = 1;
1667                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1668                         if (found > 0)
1669                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1670                 }
1671         }
1672         return 0;
1673 }
1674
1675 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1676                             struct walk_control *wc)
1677 {
1678         struct btrfs_key key;
1679         u32 nritems;
1680         int i;
1681         int ret = 0;
1682         struct cache_tree *inode_cache;
1683         struct shared_node *active_node;
1684
1685         if (wc->root_level == wc->active_node &&
1686             btrfs_root_refs(&root->root_item) == 0)
1687                 return 0;
1688
1689         active_node = wc->nodes[wc->active_node];
1690         inode_cache = &active_node->inode_cache;
1691         nritems = btrfs_header_nritems(eb);
1692         for (i = 0; i < nritems; i++) {
1693                 btrfs_item_key_to_cpu(eb, &key, i);
1694
1695                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1696                         continue;
1697                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1698                         continue;
1699
1700                 if (active_node->current == NULL ||
1701                     active_node->current->ino < key.objectid) {
1702                         if (active_node->current) {
1703                                 active_node->current->checked = 1;
1704                                 maybe_free_inode_rec(inode_cache,
1705                                                      active_node->current);
1706                         }
1707                         active_node->current = get_inode_rec(inode_cache,
1708                                                              key.objectid, 1);
1709                         BUG_ON(IS_ERR(active_node->current));
1710                 }
1711                 switch (key.type) {
1712                 case BTRFS_DIR_ITEM_KEY:
1713                 case BTRFS_DIR_INDEX_KEY:
1714                         ret = process_dir_item(root, eb, i, &key, active_node);
1715                         break;
1716                 case BTRFS_INODE_REF_KEY:
1717                         ret = process_inode_ref(eb, i, &key, active_node);
1718                         break;
1719                 case BTRFS_INODE_EXTREF_KEY:
1720                         ret = process_inode_extref(eb, i, &key, active_node);
1721                         break;
1722                 case BTRFS_INODE_ITEM_KEY:
1723                         ret = process_inode_item(eb, i, &key, active_node);
1724                         break;
1725                 case BTRFS_EXTENT_DATA_KEY:
1726                         ret = process_file_extent(root, eb, i, &key,
1727                                                   active_node);
1728                         break;
1729                 default:
1730                         break;
1731                 };
1732         }
1733         return ret;
1734 }
1735
1736 static void reada_walk_down(struct btrfs_root *root,
1737                             struct extent_buffer *node, int slot)
1738 {
1739         u64 bytenr;
1740         u64 ptr_gen;
1741         u32 nritems;
1742         u32 blocksize;
1743         int i;
1744         int level;
1745
1746         level = btrfs_header_level(node);
1747         if (level != 1)
1748                 return;
1749
1750         nritems = btrfs_header_nritems(node);
1751         blocksize = btrfs_level_size(root, level - 1);
1752         for (i = slot; i < nritems; i++) {
1753                 bytenr = btrfs_node_blockptr(node, i);
1754                 ptr_gen = btrfs_node_ptr_generation(node, i);
1755                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1756         }
1757 }
1758
1759 /*
1760  * Check the child node/leaf by the following condition:
1761  * 1. the first item key of the node/leaf should be the same with the one
1762  *    in parent.
1763  * 2. block in parent node should match the child node/leaf.
1764  * 3. generation of parent node and child's header should be consistent.
1765  *
1766  * Or the child node/leaf pointed by the key in parent is not valid.
1767  *
1768  * We hope to check leaf owner too, but since subvol may share leaves,
1769  * which makes leaf owner check not so strong, key check should be
1770  * sufficient enough for that case.
1771  */
1772 static int check_child_node(struct btrfs_root *root,
1773                             struct extent_buffer *parent, int slot,
1774                             struct extent_buffer *child)
1775 {
1776         struct btrfs_key parent_key;
1777         struct btrfs_key child_key;
1778         int ret = 0;
1779
1780         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1781         if (btrfs_header_level(child) == 0)
1782                 btrfs_item_key_to_cpu(child, &child_key, 0);
1783         else
1784                 btrfs_node_key_to_cpu(child, &child_key, 0);
1785
1786         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1787                 ret = -EINVAL;
1788                 fprintf(stderr,
1789                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1790                         parent_key.objectid, parent_key.type, parent_key.offset,
1791                         child_key.objectid, child_key.type, child_key.offset);
1792         }
1793         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1794                 ret = -EINVAL;
1795                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1796                         btrfs_node_blockptr(parent, slot),
1797                         btrfs_header_bytenr(child));
1798         }
1799         if (btrfs_node_ptr_generation(parent, slot) !=
1800             btrfs_header_generation(child)) {
1801                 ret = -EINVAL;
1802                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1803                         btrfs_header_generation(child),
1804                         btrfs_node_ptr_generation(parent, slot));
1805         }
1806         return ret;
1807 }
1808
1809 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1810                           struct walk_control *wc, int *level)
1811 {
1812         enum btrfs_tree_block_status status;
1813         u64 bytenr;
1814         u64 ptr_gen;
1815         struct extent_buffer *next;
1816         struct extent_buffer *cur;
1817         u32 blocksize;
1818         int ret, err = 0;
1819         u64 refs;
1820
1821         WARN_ON(*level < 0);
1822         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1823         ret = btrfs_lookup_extent_info(NULL, root,
1824                                        path->nodes[*level]->start,
1825                                        *level, 1, &refs, NULL);
1826         if (ret < 0) {
1827                 err = ret;
1828                 goto out;
1829         }
1830
1831         if (refs > 1) {
1832                 ret = enter_shared_node(root, path->nodes[*level]->start,
1833                                         refs, wc, *level);
1834                 if (ret > 0) {
1835                         err = ret;
1836                         goto out;
1837                 }
1838         }
1839
1840         while (*level >= 0) {
1841                 WARN_ON(*level < 0);
1842                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1843                 cur = path->nodes[*level];
1844
1845                 if (btrfs_header_level(cur) != *level)
1846                         WARN_ON(1);
1847
1848                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1849                         break;
1850                 if (*level == 0) {
1851                         ret = process_one_leaf(root, cur, wc);
1852                         if (ret < 0)
1853                                 err = ret;
1854                         break;
1855                 }
1856                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1857                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1858                 blocksize = btrfs_level_size(root, *level - 1);
1859                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1860                                                1, &refs, NULL);
1861                 if (ret < 0)
1862                         refs = 0;
1863
1864                 if (refs > 1) {
1865                         ret = enter_shared_node(root, bytenr, refs,
1866                                                 wc, *level - 1);
1867                         if (ret > 0) {
1868                                 path->slots[*level]++;
1869                                 continue;
1870                         }
1871                 }
1872
1873                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1874                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1875                         free_extent_buffer(next);
1876                         reada_walk_down(root, cur, path->slots[*level]);
1877                         next = read_tree_block(root, bytenr, blocksize,
1878                                                ptr_gen);
1879                         if (!extent_buffer_uptodate(next)) {
1880                                 struct btrfs_key node_key;
1881
1882                                 btrfs_node_key_to_cpu(path->nodes[*level],
1883                                                       &node_key,
1884                                                       path->slots[*level]);
1885                                 btrfs_add_corrupt_extent_record(root->fs_info,
1886                                                 &node_key,
1887                                                 path->nodes[*level]->start,
1888                                                 root->leafsize, *level);
1889                                 err = -EIO;
1890                                 goto out;
1891                         }
1892                 }
1893
1894                 ret = check_child_node(root, cur, path->slots[*level], next);
1895                 if (ret) {
1896                         err = ret;
1897                         goto out;
1898                 }
1899
1900                 if (btrfs_is_leaf(next))
1901                         status = btrfs_check_leaf(root, NULL, next);
1902                 else
1903                         status = btrfs_check_node(root, NULL, next);
1904                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1905                         free_extent_buffer(next);
1906                         err = -EIO;
1907                         goto out;
1908                 }
1909
1910                 *level = *level - 1;
1911                 free_extent_buffer(path->nodes[*level]);
1912                 path->nodes[*level] = next;
1913                 path->slots[*level] = 0;
1914         }
1915 out:
1916         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1917         return err;
1918 }
1919
1920 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1921                         struct walk_control *wc, int *level)
1922 {
1923         int i;
1924         struct extent_buffer *leaf;
1925
1926         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1927                 leaf = path->nodes[i];
1928                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1929                         path->slots[i]++;
1930                         *level = i;
1931                         return 0;
1932                 } else {
1933                         free_extent_buffer(path->nodes[*level]);
1934                         path->nodes[*level] = NULL;
1935                         BUG_ON(*level > wc->active_node);
1936                         if (*level == wc->active_node)
1937                                 leave_shared_node(root, wc, *level);
1938                         *level = i + 1;
1939                 }
1940         }
1941         return 1;
1942 }
1943
1944 static int check_root_dir(struct inode_record *rec)
1945 {
1946         struct inode_backref *backref;
1947         int ret = -1;
1948
1949         if (!rec->found_inode_item || rec->errors)
1950                 goto out;
1951         if (rec->nlink != 1 || rec->found_link != 0)
1952                 goto out;
1953         if (list_empty(&rec->backrefs))
1954                 goto out;
1955         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1956         if (!backref->found_inode_ref)
1957                 goto out;
1958         if (backref->index != 0 || backref->namelen != 2 ||
1959             memcmp(backref->name, "..", 2))
1960                 goto out;
1961         if (backref->found_dir_index || backref->found_dir_item)
1962                 goto out;
1963         ret = 0;
1964 out:
1965         return ret;
1966 }
1967
1968 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1969                               struct btrfs_root *root, struct btrfs_path *path,
1970                               struct inode_record *rec)
1971 {
1972         struct btrfs_inode_item *ei;
1973         struct btrfs_key key;
1974         int ret;
1975
1976         key.objectid = rec->ino;
1977         key.type = BTRFS_INODE_ITEM_KEY;
1978         key.offset = (u64)-1;
1979
1980         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1981         if (ret < 0)
1982                 goto out;
1983         if (ret) {
1984                 if (!path->slots[0]) {
1985                         ret = -ENOENT;
1986                         goto out;
1987                 }
1988                 path->slots[0]--;
1989                 ret = 0;
1990         }
1991         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1992         if (key.objectid != rec->ino) {
1993                 ret = -ENOENT;
1994                 goto out;
1995         }
1996
1997         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1998                             struct btrfs_inode_item);
1999         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2000         btrfs_mark_buffer_dirty(path->nodes[0]);
2001         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2002         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2003                root->root_key.objectid);
2004 out:
2005         btrfs_release_path(path);
2006         return ret;
2007 }
2008
2009 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2010                                     struct btrfs_root *root,
2011                                     struct btrfs_path *path,
2012                                     struct inode_record *rec)
2013 {
2014         int ret;
2015
2016         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2017         btrfs_release_path(path);
2018         if (!ret)
2019                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2020         return ret;
2021 }
2022
2023 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2024                                struct btrfs_root *root,
2025                                struct btrfs_path *path,
2026                                struct inode_record *rec)
2027 {
2028         struct btrfs_inode_item *ei;
2029         struct btrfs_key key;
2030         int ret = 0;
2031
2032         key.objectid = rec->ino;
2033         key.type = BTRFS_INODE_ITEM_KEY;
2034         key.offset = 0;
2035
2036         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2037         if (ret) {
2038                 if (ret > 0)
2039                         ret = -ENOENT;
2040                 goto out;
2041         }
2042
2043         /* Since ret == 0, no need to check anything */
2044         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2045                             struct btrfs_inode_item);
2046         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2047         btrfs_mark_buffer_dirty(path->nodes[0]);
2048         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2049         printf("reset nbytes for ino %llu root %llu\n",
2050                rec->ino, root->root_key.objectid);
2051 out:
2052         btrfs_release_path(path);
2053         return ret;
2054 }
2055
2056 static int add_missing_dir_index(struct btrfs_root *root,
2057                                  struct cache_tree *inode_cache,
2058                                  struct inode_record *rec,
2059                                  struct inode_backref *backref)
2060 {
2061         struct btrfs_path *path;
2062         struct btrfs_trans_handle *trans;
2063         struct btrfs_dir_item *dir_item;
2064         struct extent_buffer *leaf;
2065         struct btrfs_key key;
2066         struct btrfs_disk_key disk_key;
2067         struct inode_record *dir_rec;
2068         unsigned long name_ptr;
2069         u32 data_size = sizeof(*dir_item) + backref->namelen;
2070         int ret;
2071
2072         path = btrfs_alloc_path();
2073         if (!path)
2074                 return -ENOMEM;
2075
2076         trans = btrfs_start_transaction(root, 1);
2077         if (IS_ERR(trans)) {
2078                 btrfs_free_path(path);
2079                 return PTR_ERR(trans);
2080         }
2081
2082         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2083                 (unsigned long long)rec->ino);
2084         key.objectid = backref->dir;
2085         key.type = BTRFS_DIR_INDEX_KEY;
2086         key.offset = backref->index;
2087
2088         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2089         BUG_ON(ret);
2090
2091         leaf = path->nodes[0];
2092         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2093
2094         disk_key.objectid = cpu_to_le64(rec->ino);
2095         disk_key.type = BTRFS_INODE_ITEM_KEY;
2096         disk_key.offset = 0;
2097
2098         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2099         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2100         btrfs_set_dir_data_len(leaf, dir_item, 0);
2101         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2102         name_ptr = (unsigned long)(dir_item + 1);
2103         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2104         btrfs_mark_buffer_dirty(leaf);
2105         btrfs_free_path(path);
2106         btrfs_commit_transaction(trans, root);
2107
2108         backref->found_dir_index = 1;
2109         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2110         BUG_ON(IS_ERR(dir_rec));
2111         if (!dir_rec)
2112                 return 0;
2113         dir_rec->found_size += backref->namelen;
2114         if (dir_rec->found_size == dir_rec->isize &&
2115             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2116                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2117         if (dir_rec->found_size != dir_rec->isize)
2118                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2119
2120         return 0;
2121 }
2122
2123 static int delete_dir_index(struct btrfs_root *root,
2124                             struct cache_tree *inode_cache,
2125                             struct inode_record *rec,
2126                             struct inode_backref *backref)
2127 {
2128         struct btrfs_trans_handle *trans;
2129         struct btrfs_dir_item *di;
2130         struct btrfs_path *path;
2131         int ret = 0;
2132
2133         path = btrfs_alloc_path();
2134         if (!path)
2135                 return -ENOMEM;
2136
2137         trans = btrfs_start_transaction(root, 1);
2138         if (IS_ERR(trans)) {
2139                 btrfs_free_path(path);
2140                 return PTR_ERR(trans);
2141         }
2142
2143
2144         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2145                 (unsigned long long)backref->dir,
2146                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2147                 (unsigned long long)root->objectid);
2148
2149         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2150                                     backref->name, backref->namelen,
2151                                     backref->index, -1);
2152         if (IS_ERR(di)) {
2153                 ret = PTR_ERR(di);
2154                 btrfs_free_path(path);
2155                 btrfs_commit_transaction(trans, root);
2156                 if (ret == -ENOENT)
2157                         return 0;
2158                 return ret;
2159         }
2160
2161         if (!di)
2162                 ret = btrfs_del_item(trans, root, path);
2163         else
2164                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2165         BUG_ON(ret);
2166         btrfs_free_path(path);
2167         btrfs_commit_transaction(trans, root);
2168         return ret;
2169 }
2170
2171 static int create_inode_item(struct btrfs_root *root,
2172                              struct inode_record *rec,
2173                              struct inode_backref *backref, int root_dir)
2174 {
2175         struct btrfs_trans_handle *trans;
2176         struct btrfs_inode_item inode_item;
2177         time_t now = time(NULL);
2178         int ret;
2179
2180         trans = btrfs_start_transaction(root, 1);
2181         if (IS_ERR(trans)) {
2182                 ret = PTR_ERR(trans);
2183                 return ret;
2184         }
2185
2186         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2187                 "be incomplete, please check permissions and content after "
2188                 "the fsck completes.\n", (unsigned long long)root->objectid,
2189                 (unsigned long long)rec->ino);
2190
2191         memset(&inode_item, 0, sizeof(inode_item));
2192         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2193         if (root_dir)
2194                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2195         else
2196                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2197         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2198         if (rec->found_dir_item) {
2199                 if (rec->found_file_extent)
2200                         fprintf(stderr, "root %llu inode %llu has both a dir "
2201                                 "item and extents, unsure if it is a dir or a "
2202                                 "regular file so setting it as a directory\n",
2203                                 (unsigned long long)root->objectid,
2204                                 (unsigned long long)rec->ino);
2205                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2206                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2207         } else if (!rec->found_dir_item) {
2208                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2209                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2210         }
2211         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2212         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2213         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2214         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2215         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2216         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2217         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2218         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2219
2220         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2221         BUG_ON(ret);
2222         btrfs_commit_transaction(trans, root);
2223         return 0;
2224 }
2225
2226 static int repair_inode_backrefs(struct btrfs_root *root,
2227                                  struct inode_record *rec,
2228                                  struct cache_tree *inode_cache,
2229                                  int delete)
2230 {
2231         struct inode_backref *tmp, *backref;
2232         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2233         int ret = 0;
2234         int repaired = 0;
2235
2236         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2237                 if (!delete && rec->ino == root_dirid) {
2238                         if (!rec->found_inode_item) {
2239                                 ret = create_inode_item(root, rec, backref, 1);
2240                                 if (ret)
2241                                         break;
2242                                 repaired++;
2243                         }
2244                 }
2245
2246                 /* Index 0 for root dir's are special, don't mess with it */
2247                 if (rec->ino == root_dirid && backref->index == 0)
2248                         continue;
2249
2250                 if (delete &&
2251                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2252                      (backref->found_dir_index && backref->found_inode_ref &&
2253                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2254                         ret = delete_dir_index(root, inode_cache, rec, backref);
2255                         if (ret)
2256                                 break;
2257                         repaired++;
2258                         list_del(&backref->list);
2259                         free(backref);
2260                 }
2261
2262                 if (!delete && !backref->found_dir_index &&
2263                     backref->found_dir_item && backref->found_inode_ref) {
2264                         ret = add_missing_dir_index(root, inode_cache, rec,
2265                                                     backref);
2266                         if (ret)
2267                                 break;
2268                         repaired++;
2269                         if (backref->found_dir_item &&
2270                             backref->found_dir_index &&
2271                             backref->found_dir_index) {
2272                                 if (!backref->errors &&
2273                                     backref->found_inode_ref) {
2274                                         list_del(&backref->list);
2275                                         free(backref);
2276                                 }
2277                         }
2278                 }
2279
2280                 if (!delete && (!backref->found_dir_index &&
2281                                 !backref->found_dir_item &&
2282                                 backref->found_inode_ref)) {
2283                         struct btrfs_trans_handle *trans;
2284                         struct btrfs_key location;
2285
2286                         ret = check_dir_conflict(root, backref->name,
2287                                                  backref->namelen,
2288                                                  backref->dir,
2289                                                  backref->index);
2290                         if (ret) {
2291                                 /*
2292                                  * let nlink fixing routine to handle it,
2293                                  * which can do it better.
2294                                  */
2295                                 ret = 0;
2296                                 break;
2297                         }
2298                         location.objectid = rec->ino;
2299                         location.type = BTRFS_INODE_ITEM_KEY;
2300                         location.offset = 0;
2301
2302                         trans = btrfs_start_transaction(root, 1);
2303                         if (IS_ERR(trans)) {
2304                                 ret = PTR_ERR(trans);
2305                                 break;
2306                         }
2307                         fprintf(stderr, "adding missing dir index/item pair "
2308                                 "for inode %llu\n",
2309                                 (unsigned long long)rec->ino);
2310                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2311                                                     backref->namelen,
2312                                                     backref->dir, &location,
2313                                                     imode_to_type(rec->imode),
2314                                                     backref->index);
2315                         BUG_ON(ret);
2316                         btrfs_commit_transaction(trans, root);
2317                         repaired++;
2318                 }
2319
2320                 if (!delete && (backref->found_inode_ref &&
2321                                 backref->found_dir_index &&
2322                                 backref->found_dir_item &&
2323                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2324                                 !rec->found_inode_item)) {
2325                         ret = create_inode_item(root, rec, backref, 0);
2326                         if (ret)
2327                                 break;
2328                         repaired++;
2329                 }
2330
2331         }
2332         return ret ? ret : repaired;
2333 }
2334
2335 /*
2336  * To determine the file type for nlink/inode_item repair
2337  *
2338  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2339  * Return -ENOENT if file type is not found.
2340  */
2341 static int find_file_type(struct inode_record *rec, u8 *type)
2342 {
2343         struct inode_backref *backref;
2344
2345         /* For inode item recovered case */
2346         if (rec->found_inode_item) {
2347                 *type = imode_to_type(rec->imode);
2348                 return 0;
2349         }
2350
2351         list_for_each_entry(backref, &rec->backrefs, list) {
2352                 if (backref->found_dir_index || backref->found_dir_item) {
2353                         *type = backref->filetype;
2354                         return 0;
2355                 }
2356         }
2357         return -ENOENT;
2358 }
2359
2360 /*
2361  * To determine the file name for nlink repair
2362  *
2363  * Return 0 if file name is found, set name and namelen.
2364  * Return -ENOENT if file name is not found.
2365  */
2366 static int find_file_name(struct inode_record *rec,
2367                           char *name, int *namelen)
2368 {
2369         struct inode_backref *backref;
2370
2371         list_for_each_entry(backref, &rec->backrefs, list) {
2372                 if (backref->found_dir_index || backref->found_dir_item ||
2373                     backref->found_inode_ref) {
2374                         memcpy(name, backref->name, backref->namelen);
2375                         *namelen = backref->namelen;
2376                         return 0;
2377                 }
2378         }
2379         return -ENOENT;
2380 }
2381
2382 /* Reset the nlink of the inode to the correct one */
2383 static int reset_nlink(struct btrfs_trans_handle *trans,
2384                        struct btrfs_root *root,
2385                        struct btrfs_path *path,
2386                        struct inode_record *rec)
2387 {
2388         struct inode_backref *backref;
2389         struct inode_backref *tmp;
2390         struct btrfs_key key;
2391         struct btrfs_inode_item *inode_item;
2392         int ret = 0;
2393
2394         /* We don't believe this either, reset it and iterate backref */
2395         rec->found_link = 0;
2396
2397         /* Remove all backref including the valid ones */
2398         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2399                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2400                                    backref->index, backref->name,
2401                                    backref->namelen, 0);
2402                 if (ret < 0)
2403                         goto out;
2404
2405                 /* remove invalid backref, so it won't be added back */
2406                 if (!(backref->found_dir_index &&
2407                       backref->found_dir_item &&
2408                       backref->found_inode_ref)) {
2409                         list_del(&backref->list);
2410                         free(backref);
2411                 } else {
2412                         rec->found_link++;
2413                 }
2414         }
2415
2416         /* Set nlink to 0 */
2417         key.objectid = rec->ino;
2418         key.type = BTRFS_INODE_ITEM_KEY;
2419         key.offset = 0;
2420         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2421         if (ret < 0)
2422                 goto out;
2423         if (ret > 0) {
2424                 ret = -ENOENT;
2425                 goto out;
2426         }
2427         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2428                                     struct btrfs_inode_item);
2429         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2430         btrfs_mark_buffer_dirty(path->nodes[0]);
2431         btrfs_release_path(path);
2432
2433         /*
2434          * Add back valid inode_ref/dir_item/dir_index,
2435          * add_link() will handle the nlink inc, so new nlink must be correct
2436          */
2437         list_for_each_entry(backref, &rec->backrefs, list) {
2438                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2439                                      backref->name, backref->namelen,
2440                                      backref->filetype, &backref->index, 1);
2441                 if (ret < 0)
2442                         goto out;
2443         }
2444 out:
2445         btrfs_release_path(path);
2446         return ret;
2447 }
2448
2449 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2450                                struct btrfs_root *root,
2451                                struct btrfs_path *path,
2452                                struct inode_record *rec)
2453 {
2454         char *dir_name = "lost+found";
2455         char namebuf[BTRFS_NAME_LEN] = {0};
2456         u64 lost_found_ino;
2457         u32 mode = 0700;
2458         u8 type = 0;
2459         int namelen = 0;
2460         int name_recovered = 0;
2461         int type_recovered = 0;
2462         int ret = 0;
2463
2464         /*
2465          * Get file name and type first before these invalid inode ref
2466          * are deleted by remove_all_invalid_backref()
2467          */
2468         name_recovered = !find_file_name(rec, namebuf, &namelen);
2469         type_recovered = !find_file_type(rec, &type);
2470
2471         if (!name_recovered) {
2472                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2473                        rec->ino, rec->ino);
2474                 namelen = count_digits(rec->ino);
2475                 sprintf(namebuf, "%llu", rec->ino);
2476                 name_recovered = 1;
2477         }
2478         if (!type_recovered) {
2479                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2480                        rec->ino);
2481                 type = BTRFS_FT_REG_FILE;
2482                 type_recovered = 1;
2483         }
2484
2485         ret = reset_nlink(trans, root, path, rec);
2486         if (ret < 0) {
2487                 fprintf(stderr,
2488                         "Failed to reset nlink for inode %llu: %s\n",
2489                         rec->ino, strerror(-ret));
2490                 goto out;
2491         }
2492
2493         if (rec->found_link == 0) {
2494                 lost_found_ino = root->highest_inode;
2495                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2496                         ret = -EOVERFLOW;
2497                         goto out;
2498                 }
2499                 lost_found_ino++;
2500                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2501                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2502                                   mode);
2503                 if (ret < 0) {
2504                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2505                                 dir_name, strerror(-ret));
2506                         goto out;
2507                 }
2508                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2509                                      namebuf, namelen, type, NULL, 1);
2510                 /*
2511                  * Add ".INO" suffix several times to handle case where
2512                  * "FILENAME.INO" is already taken by another file.
2513                  */
2514                 while (ret == -EEXIST) {
2515                         /*
2516                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2517                          */
2518                         if (namelen + count_digits(rec->ino) + 1 >
2519                             BTRFS_NAME_LEN) {
2520                                 ret = -EFBIG;
2521                                 goto out;
2522                         }
2523                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2524                                  ".%llu", rec->ino);
2525                         namelen += count_digits(rec->ino) + 1;
2526                         ret = btrfs_add_link(trans, root, rec->ino,
2527                                              lost_found_ino, namebuf,
2528                                              namelen, type, NULL, 1);
2529                 }
2530                 if (ret < 0) {
2531                         fprintf(stderr,
2532                                 "Failed to link the inode %llu to %s dir: %s\n",
2533                                 rec->ino, dir_name, strerror(-ret));
2534                         goto out;
2535                 }
2536                 /*
2537                  * Just increase the found_link, don't actually add the
2538                  * backref. This will make things easier and this inode
2539                  * record will be freed after the repair is done.
2540                  * So fsck will not report problem about this inode.
2541                  */
2542                 rec->found_link++;
2543                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2544                        namelen, namebuf, dir_name);
2545         }
2546         printf("Fixed the nlink of inode %llu\n", rec->ino);
2547 out:
2548         /*
2549          * Clear the flag anyway, or we will loop forever for the same inode
2550          * as it will not be removed from the bad inode list and the dead loop
2551          * happens.
2552          */
2553         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2554         btrfs_release_path(path);
2555         return ret;
2556 }
2557
2558 /*
2559  * Check if there is any normal(reg or prealloc) file extent for given
2560  * ino.
2561  * This is used to determine the file type when neither its dir_index/item or
2562  * inode_item exists.
2563  *
2564  * This will *NOT* report error, if any error happens, just consider it does
2565  * not have any normal file extent.
2566  */
2567 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2568 {
2569         struct btrfs_path *path;
2570         struct btrfs_key key;
2571         struct btrfs_key found_key;
2572         struct btrfs_file_extent_item *fi;
2573         u8 type;
2574         int ret = 0;
2575
2576         path = btrfs_alloc_path();
2577         if (!path)
2578                 goto out;
2579         key.objectid = ino;
2580         key.type = BTRFS_EXTENT_DATA_KEY;
2581         key.offset = 0;
2582
2583         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2584         if (ret < 0) {
2585                 ret = 0;
2586                 goto out;
2587         }
2588         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2589                 ret = btrfs_next_leaf(root, path);
2590                 if (ret) {
2591                         ret = 0;
2592                         goto out;
2593                 }
2594         }
2595         while (1) {
2596                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2597                                       path->slots[0]);
2598                 if (found_key.objectid != ino ||
2599                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2600                         break;
2601                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2602                                     struct btrfs_file_extent_item);
2603                 type = btrfs_file_extent_type(path->nodes[0], fi);
2604                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2605                         ret = 1;
2606                         goto out;
2607                 }
2608         }
2609 out:
2610         btrfs_free_path(path);
2611         return ret;
2612 }
2613
2614 static u32 btrfs_type_to_imode(u8 type)
2615 {
2616         static u32 imode_by_btrfs_type[] = {
2617                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2618                 [BTRFS_FT_DIR]          = S_IFDIR,
2619                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2620                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2621                 [BTRFS_FT_FIFO]         = S_IFIFO,
2622                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2623                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2624         };
2625
2626         return imode_by_btrfs_type[(type)];
2627 }
2628
2629 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2630                                 struct btrfs_root *root,
2631                                 struct btrfs_path *path,
2632                                 struct inode_record *rec)
2633 {
2634         u8 filetype;
2635         u32 mode = 0700;
2636         int type_recovered = 0;
2637         int ret = 0;
2638
2639         printf("Trying to rebuild inode:%llu\n", rec->ino);
2640
2641         type_recovered = !find_file_type(rec, &filetype);
2642
2643         /*
2644          * Try to determine inode type if type not found.
2645          *
2646          * For found regular file extent, it must be FILE.
2647          * For found dir_item/index, it must be DIR.
2648          *
2649          * For undetermined one, use FILE as fallback.
2650          *
2651          * TODO:
2652          * 1. If found backref(inode_index/item is already handled) to it,
2653          *    it must be DIR.
2654          *    Need new inode-inode ref structure to allow search for that.
2655          */
2656         if (!type_recovered) {
2657                 if (rec->found_file_extent &&
2658                     find_normal_file_extent(root, rec->ino)) {
2659                         type_recovered = 1;
2660                         filetype = BTRFS_FT_REG_FILE;
2661                 } else if (rec->found_dir_item) {
2662                         type_recovered = 1;
2663                         filetype = BTRFS_FT_DIR;
2664                 } else if (!list_empty(&rec->orphan_extents)) {
2665                         type_recovered = 1;
2666                         filetype = BTRFS_FT_REG_FILE;
2667                 } else{
2668                         printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2669                                rec->ino);
2670                         type_recovered = 1;
2671                         filetype = BTRFS_FT_REG_FILE;
2672                 }
2673         }
2674
2675         ret = btrfs_new_inode(trans, root, rec->ino,
2676                               mode | btrfs_type_to_imode(filetype));
2677         if (ret < 0)
2678                 goto out;
2679
2680         /*
2681          * Here inode rebuild is done, we only rebuild the inode item,
2682          * don't repair the nlink(like move to lost+found).
2683          * That is the job of nlink repair.
2684          *
2685          * We just fill the record and return
2686          */
2687         rec->found_dir_item = 1;
2688         rec->imode = mode | btrfs_type_to_imode(filetype);
2689         rec->nlink = 0;
2690         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2691         /* Ensure the inode_nlinks repair function will be called */
2692         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2693 out:
2694         return ret;
2695 }
2696
2697 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2698                                       struct btrfs_root *root,
2699                                       struct btrfs_path *path,
2700                                       struct inode_record *rec)
2701 {
2702         struct orphan_data_extent *orphan;
2703         struct orphan_data_extent *tmp;
2704         int ret = 0;
2705
2706         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2707                 /*
2708                  * Check for conflicting file extents
2709                  *
2710                  * Here we don't know whether the extents is compressed or not,
2711                  * so we can only assume it not compressed nor data offset,
2712                  * and use its disk_len as extent length.
2713                  */
2714                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2715                                        orphan->offset, orphan->disk_len, 0);
2716                 btrfs_release_path(path);
2717                 if (ret < 0)
2718                         goto out;
2719                 if (!ret) {
2720                         fprintf(stderr,
2721                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2722                                 orphan->disk_bytenr, orphan->disk_len);
2723                         ret = btrfs_free_extent(trans,
2724                                         root->fs_info->extent_root,
2725                                         orphan->disk_bytenr, orphan->disk_len,
2726                                         0, root->objectid, orphan->objectid,
2727                                         orphan->offset);
2728                         if (ret < 0)
2729                                 goto out;
2730                 }
2731                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2732                                 orphan->offset, orphan->disk_bytenr,
2733                                 orphan->disk_len, orphan->disk_len);
2734                 if (ret < 0)
2735                         goto out;
2736
2737                 /* Update file size info */
2738                 rec->found_size += orphan->disk_len;
2739                 if (rec->found_size == rec->nbytes)
2740                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2741
2742                 /* Update the file extent hole info too */
2743                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2744                                            orphan->disk_len);
2745                 if (ret < 0)
2746                         goto out;
2747                 if (RB_EMPTY_ROOT(&rec->holes))
2748                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2749
2750                 list_del(&orphan->list);
2751                 free(orphan);
2752         }
2753         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2754 out:
2755         return ret;
2756 }
2757
2758 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2759                                         struct btrfs_root *root,
2760                                         struct btrfs_path *path,
2761                                         struct inode_record *rec)
2762 {
2763         struct rb_node *node;
2764         struct file_extent_hole *hole;
2765         int found = 0;
2766         int ret = 0;
2767
2768         node = rb_first(&rec->holes);
2769
2770         while (node) {
2771                 found = 1;
2772                 hole = rb_entry(node, struct file_extent_hole, node);
2773                 ret = btrfs_punch_hole(trans, root, rec->ino,
2774                                        hole->start, hole->len);
2775                 if (ret < 0)
2776                         goto out;
2777                 ret = del_file_extent_hole(&rec->holes, hole->start,
2778                                            hole->len);
2779                 if (ret < 0)
2780                         goto out;
2781                 if (RB_EMPTY_ROOT(&rec->holes))
2782                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2783                 node = rb_first(&rec->holes);
2784         }
2785         /* special case for a file losing all its file extent */
2786         if (!found) {
2787                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2788                                        round_up(rec->isize, root->sectorsize));
2789                 if (ret < 0)
2790                         goto out;
2791         }
2792         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2793                rec->ino, root->objectid);
2794 out:
2795         return ret;
2796 }
2797
2798 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2799 {
2800         struct btrfs_trans_handle *trans;
2801         struct btrfs_path *path;
2802         int ret = 0;
2803
2804         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2805                              I_ERR_NO_ORPHAN_ITEM |
2806                              I_ERR_LINK_COUNT_WRONG |
2807                              I_ERR_NO_INODE_ITEM |
2808                              I_ERR_FILE_EXTENT_ORPHAN |
2809                              I_ERR_FILE_EXTENT_DISCOUNT|
2810                              I_ERR_FILE_NBYTES_WRONG)))
2811                 return rec->errors;
2812
2813         path = btrfs_alloc_path();
2814         if (!path)
2815                 return -ENOMEM;
2816
2817         /*
2818          * For nlink repair, it may create a dir and add link, so
2819          * 2 for parent(256)'s dir_index and dir_item
2820          * 2 for lost+found dir's inode_item and inode_ref
2821          * 1 for the new inode_ref of the file
2822          * 2 for lost+found dir's dir_index and dir_item for the file
2823          */
2824         trans = btrfs_start_transaction(root, 7);
2825         if (IS_ERR(trans)) {
2826                 btrfs_free_path(path);
2827                 return PTR_ERR(trans);
2828         }
2829
2830         if (rec->errors & I_ERR_NO_INODE_ITEM)
2831                 ret = repair_inode_no_item(trans, root, path, rec);
2832         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2833                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2834         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2835                 ret = repair_inode_discount_extent(trans, root, path, rec);
2836         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2837                 ret = repair_inode_isize(trans, root, path, rec);
2838         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2839                 ret = repair_inode_orphan_item(trans, root, path, rec);
2840         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2841                 ret = repair_inode_nlinks(trans, root, path, rec);
2842         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2843                 ret = repair_inode_nbytes(trans, root, path, rec);
2844         btrfs_commit_transaction(trans, root);
2845         btrfs_free_path(path);
2846         return ret;
2847 }
2848
2849 static int check_inode_recs(struct btrfs_root *root,
2850                             struct cache_tree *inode_cache)
2851 {
2852         struct cache_extent *cache;
2853         struct ptr_node *node;
2854         struct inode_record *rec;
2855         struct inode_backref *backref;
2856         int stage = 0;
2857         int ret = 0;
2858         int err = 0;
2859         u64 error = 0;
2860         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2861
2862         if (btrfs_root_refs(&root->root_item) == 0) {
2863                 if (!cache_tree_empty(inode_cache))
2864                         fprintf(stderr, "warning line %d\n", __LINE__);
2865                 return 0;
2866         }
2867
2868         /*
2869          * We need to record the highest inode number for later 'lost+found'
2870          * dir creation.
2871          * We must select a ino not used/refered by any existing inode, or
2872          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2873          * this may cause 'lost+found' dir has wrong nlinks.
2874          */
2875         cache = last_cache_extent(inode_cache);
2876         if (cache) {
2877                 node = container_of(cache, struct ptr_node, cache);
2878                 rec = node->data;
2879                 if (rec->ino > root->highest_inode)
2880                         root->highest_inode = rec->ino;
2881         }
2882
2883         /*
2884          * We need to repair backrefs first because we could change some of the
2885          * errors in the inode recs.
2886          *
2887          * We also need to go through and delete invalid backrefs first and then
2888          * add the correct ones second.  We do this because we may get EEXIST
2889          * when adding back the correct index because we hadn't yet deleted the
2890          * invalid index.
2891          *
2892          * For example, if we were missing a dir index then the directories
2893          * isize would be wrong, so if we fixed the isize to what we thought it
2894          * would be and then fixed the backref we'd still have a invalid fs, so
2895          * we need to add back the dir index and then check to see if the isize
2896          * is still wrong.
2897          */
2898         while (stage < 3) {
2899                 stage++;
2900                 if (stage == 3 && !err)
2901                         break;
2902
2903                 cache = search_cache_extent(inode_cache, 0);
2904                 while (repair && cache) {
2905                         node = container_of(cache, struct ptr_node, cache);
2906                         rec = node->data;
2907                         cache = next_cache_extent(cache);
2908
2909                         /* Need to free everything up and rescan */
2910                         if (stage == 3) {
2911                                 remove_cache_extent(inode_cache, &node->cache);
2912                                 free(node);
2913                                 free_inode_rec(rec);
2914                                 continue;
2915                         }
2916
2917                         if (list_empty(&rec->backrefs))
2918                                 continue;
2919
2920                         ret = repair_inode_backrefs(root, rec, inode_cache,
2921                                                     stage == 1);
2922                         if (ret < 0) {
2923                                 err = ret;
2924                                 stage = 2;
2925                                 break;
2926                         } if (ret > 0) {
2927                                 err = -EAGAIN;
2928                         }
2929                 }
2930         }
2931         if (err)
2932                 return err;
2933
2934         rec = get_inode_rec(inode_cache, root_dirid, 0);
2935         BUG_ON(IS_ERR(rec));
2936         if (rec) {
2937                 ret = check_root_dir(rec);
2938                 if (ret) {
2939                         fprintf(stderr, "root %llu root dir %llu error\n",
2940                                 (unsigned long long)root->root_key.objectid,
2941                                 (unsigned long long)root_dirid);
2942                         print_inode_error(root, rec);
2943                         error++;
2944                 }
2945         } else {
2946                 if (repair) {
2947                         struct btrfs_trans_handle *trans;
2948
2949                         trans = btrfs_start_transaction(root, 1);
2950                         if (IS_ERR(trans)) {
2951                                 err = PTR_ERR(trans);
2952                                 return err;
2953                         }
2954
2955                         fprintf(stderr,
2956                                 "root %llu missing its root dir, recreating\n",
2957                                 (unsigned long long)root->objectid);
2958
2959                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2960                         BUG_ON(ret);
2961
2962                         btrfs_commit_transaction(trans, root);
2963                         return -EAGAIN;
2964                 }
2965
2966                 fprintf(stderr, "root %llu root dir %llu not found\n",
2967                         (unsigned long long)root->root_key.objectid,
2968                         (unsigned long long)root_dirid);
2969         }
2970
2971         while (1) {
2972                 cache = search_cache_extent(inode_cache, 0);
2973                 if (!cache)
2974                         break;
2975                 node = container_of(cache, struct ptr_node, cache);
2976                 rec = node->data;
2977                 remove_cache_extent(inode_cache, &node->cache);
2978                 free(node);
2979                 if (rec->ino == root_dirid ||
2980                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2981                         free_inode_rec(rec);
2982                         continue;
2983                 }
2984
2985                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2986                         ret = check_orphan_item(root, rec->ino);
2987                         if (ret == 0)
2988                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2989                         if (can_free_inode_rec(rec)) {
2990                                 free_inode_rec(rec);
2991                                 continue;
2992                         }
2993                 }
2994
2995                 if (!rec->found_inode_item)
2996                         rec->errors |= I_ERR_NO_INODE_ITEM;
2997                 if (rec->found_link != rec->nlink)
2998                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2999                 if (repair) {
3000                         ret = try_repair_inode(root, rec);
3001                         if (ret == 0 && can_free_inode_rec(rec)) {
3002                                 free_inode_rec(rec);
3003                                 continue;
3004                         }
3005                         ret = 0;
3006                 }
3007
3008                 if (!(repair && ret == 0))
3009                         error++;
3010                 print_inode_error(root, rec);
3011                 list_for_each_entry(backref, &rec->backrefs, list) {
3012                         if (!backref->found_dir_item)
3013                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3014                         if (!backref->found_dir_index)
3015                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3016                         if (!backref->found_inode_ref)
3017                                 backref->errors |= REF_ERR_NO_INODE_REF;
3018                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3019                                 " namelen %u name %s filetype %d errors %x",
3020                                 (unsigned long long)backref->dir,
3021                                 (unsigned long long)backref->index,
3022                                 backref->namelen, backref->name,
3023                                 backref->filetype, backref->errors);
3024                         print_ref_error(backref->errors);
3025                 }
3026                 free_inode_rec(rec);
3027         }
3028         return (error > 0) ? -1 : 0;
3029 }
3030
3031 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3032                                         u64 objectid)
3033 {
3034         struct cache_extent *cache;
3035         struct root_record *rec = NULL;
3036         int ret;
3037
3038         cache = lookup_cache_extent(root_cache, objectid, 1);
3039         if (cache) {
3040                 rec = container_of(cache, struct root_record, cache);
3041         } else {
3042                 rec = calloc(1, sizeof(*rec));
3043                 rec->objectid = objectid;
3044                 INIT_LIST_HEAD(&rec->backrefs);
3045                 rec->cache.start = objectid;
3046                 rec->cache.size = 1;
3047
3048                 ret = insert_cache_extent(root_cache, &rec->cache);
3049                 BUG_ON(ret);
3050         }
3051         return rec;
3052 }
3053
3054 static struct root_backref *get_root_backref(struct root_record *rec,
3055                                              u64 ref_root, u64 dir, u64 index,
3056                                              const char *name, int namelen)
3057 {
3058         struct root_backref *backref;
3059
3060         list_for_each_entry(backref, &rec->backrefs, list) {
3061                 if (backref->ref_root != ref_root || backref->dir != dir ||
3062                     backref->namelen != namelen)
3063                         continue;
3064                 if (memcmp(name, backref->name, namelen))
3065                         continue;
3066                 return backref;
3067         }
3068
3069         backref = calloc(1, sizeof(*backref) + namelen + 1);
3070         backref->ref_root = ref_root;
3071         backref->dir = dir;
3072         backref->index = index;
3073         backref->namelen = namelen;
3074         memcpy(backref->name, name, namelen);
3075         backref->name[namelen] = '\0';
3076         list_add_tail(&backref->list, &rec->backrefs);
3077         return backref;
3078 }
3079
3080 static void free_root_record(struct cache_extent *cache)
3081 {
3082         struct root_record *rec;
3083         struct root_backref *backref;
3084
3085         rec = container_of(cache, struct root_record, cache);
3086         while (!list_empty(&rec->backrefs)) {
3087                 backref = list_entry(rec->backrefs.next,
3088                                      struct root_backref, list);
3089                 list_del(&backref->list);
3090                 free(backref);
3091         }
3092
3093         kfree(rec);
3094 }
3095
3096 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3097
3098 static int add_root_backref(struct cache_tree *root_cache,
3099                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3100                             const char *name, int namelen,
3101                             int item_type, int errors)
3102 {
3103         struct root_record *rec;
3104         struct root_backref *backref;
3105
3106         rec = get_root_rec(root_cache, root_id);
3107         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3108
3109         backref->errors |= errors;
3110
3111         if (item_type != BTRFS_DIR_ITEM_KEY) {
3112                 if (backref->found_dir_index || backref->found_back_ref ||
3113                     backref->found_forward_ref) {
3114                         if (backref->index != index)
3115                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3116                 } else {
3117                         backref->index = index;
3118                 }
3119         }
3120
3121         if (item_type == BTRFS_DIR_ITEM_KEY) {
3122                 if (backref->found_forward_ref)
3123                         rec->found_ref++;
3124                 backref->found_dir_item = 1;
3125         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3126                 backref->found_dir_index = 1;
3127         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3128                 if (backref->found_forward_ref)
3129                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3130                 else if (backref->found_dir_item)
3131                         rec->found_ref++;
3132                 backref->found_forward_ref = 1;
3133         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3134                 if (backref->found_back_ref)
3135                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3136                 backref->found_back_ref = 1;
3137         } else {
3138                 BUG_ON(1);
3139         }
3140
3141         if (backref->found_forward_ref && backref->found_dir_item)
3142                 backref->reachable = 1;
3143         return 0;
3144 }
3145
3146 static int merge_root_recs(struct btrfs_root *root,
3147                            struct cache_tree *src_cache,
3148                            struct cache_tree *dst_cache)
3149 {
3150         struct cache_extent *cache;
3151         struct ptr_node *node;
3152         struct inode_record *rec;
3153         struct inode_backref *backref;
3154         int ret = 0;
3155
3156         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3157                 free_inode_recs_tree(src_cache);
3158                 return 0;
3159         }
3160
3161         while (1) {
3162                 cache = search_cache_extent(src_cache, 0);
3163                 if (!cache)
3164                         break;
3165                 node = container_of(cache, struct ptr_node, cache);
3166                 rec = node->data;
3167                 remove_cache_extent(src_cache, &node->cache);
3168                 free(node);
3169
3170                 ret = is_child_root(root, root->objectid, rec->ino);
3171                 if (ret < 0)
3172                         break;
3173                 else if (ret == 0)
3174                         goto skip;
3175
3176                 list_for_each_entry(backref, &rec->backrefs, list) {
3177                         BUG_ON(backref->found_inode_ref);
3178                         if (backref->found_dir_item)
3179                                 add_root_backref(dst_cache, rec->ino,
3180                                         root->root_key.objectid, backref->dir,
3181                                         backref->index, backref->name,
3182                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3183                                         backref->errors);
3184                         if (backref->found_dir_index)
3185                                 add_root_backref(dst_cache, rec->ino,
3186                                         root->root_key.objectid, backref->dir,
3187                                         backref->index, backref->name,
3188                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3189                                         backref->errors);
3190                 }
3191 skip:
3192                 free_inode_rec(rec);
3193         }
3194         if (ret < 0)
3195                 return ret;
3196         return 0;
3197 }
3198
3199 static int check_root_refs(struct btrfs_root *root,
3200                            struct cache_tree *root_cache)
3201 {
3202         struct root_record *rec;
3203         struct root_record *ref_root;
3204         struct root_backref *backref;
3205         struct cache_extent *cache;
3206         int loop = 1;
3207         int ret;
3208         int error;
3209         int errors = 0;
3210
3211         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3212         rec->found_ref = 1;
3213
3214         /* fixme: this can not detect circular references */
3215         while (loop) {
3216                 loop = 0;
3217                 cache = search_cache_extent(root_cache, 0);
3218                 while (1) {
3219                         if (!cache)
3220                                 break;
3221                         rec = container_of(cache, struct root_record, cache);
3222                         cache = next_cache_extent(cache);
3223
3224                         if (rec->found_ref == 0)
3225                                 continue;
3226
3227                         list_for_each_entry(backref, &rec->backrefs, list) {
3228                                 if (!backref->reachable)
3229                                         continue;
3230
3231                                 ref_root = get_root_rec(root_cache,
3232                                                         backref->ref_root);
3233                                 if (ref_root->found_ref > 0)
3234                                         continue;
3235
3236                                 backref->reachable = 0;
3237                                 rec->found_ref--;
3238                                 if (rec->found_ref == 0)
3239                                         loop = 1;
3240                         }
3241                 }
3242         }
3243
3244         cache = search_cache_extent(root_cache, 0);
3245         while (1) {
3246                 if (!cache)
3247                         break;
3248                 rec = container_of(cache, struct root_record, cache);
3249                 cache = next_cache_extent(cache);
3250
3251                 if (rec->found_ref == 0 &&
3252                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3253                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3254                         ret = check_orphan_item(root->fs_info->tree_root,
3255                                                 rec->objectid);
3256                         if (ret == 0)
3257                                 continue;
3258
3259                         /*
3260                          * If we don't have a root item then we likely just have
3261                          * a dir item in a snapshot for this root but no actual
3262                          * ref key or anything so it's meaningless.
3263                          */
3264                         if (!rec->found_root_item)
3265                                 continue;
3266                         errors++;
3267                         fprintf(stderr, "fs tree %llu not referenced\n",
3268                                 (unsigned long long)rec->objectid);
3269                 }
3270
3271                 error = 0;
3272                 if (rec->found_ref > 0 && !rec->found_root_item)
3273                         error = 1;
3274                 list_for_each_entry(backref, &rec->backrefs, list) {
3275                         if (!backref->found_dir_item)
3276                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3277                         if (!backref->found_dir_index)
3278                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3279                         if (!backref->found_back_ref)
3280                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3281                         if (!backref->found_forward_ref)
3282                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3283                         if (backref->reachable && backref->errors)
3284                                 error = 1;
3285                 }
3286                 if (!error)
3287                         continue;
3288
3289                 errors++;
3290                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3291                         (unsigned long long)rec->objectid, rec->found_ref,
3292                          rec->found_root_item ? "" : "not found");
3293
3294                 list_for_each_entry(backref, &rec->backrefs, list) {
3295                         if (!backref->reachable)
3296                                 continue;
3297                         if (!backref->errors && rec->found_root_item)
3298                                 continue;
3299                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3300                                 " index %llu namelen %u name %s errors %x\n",
3301                                 (unsigned long long)backref->ref_root,
3302                                 (unsigned long long)backref->dir,
3303                                 (unsigned long long)backref->index,
3304                                 backref->namelen, backref->name,
3305                                 backref->errors);
3306                         print_ref_error(backref->errors);
3307                 }
3308         }
3309         return errors > 0 ? 1 : 0;
3310 }
3311
3312 static int process_root_ref(struct extent_buffer *eb, int slot,
3313                             struct btrfs_key *key,
3314                             struct cache_tree *root_cache)
3315 {
3316         u64 dirid;
3317         u64 index;
3318         u32 len;
3319         u32 name_len;
3320         struct btrfs_root_ref *ref;
3321         char namebuf[BTRFS_NAME_LEN];
3322         int error;
3323
3324         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3325
3326         dirid = btrfs_root_ref_dirid(eb, ref);
3327         index = btrfs_root_ref_sequence(eb, ref);
3328         name_len = btrfs_root_ref_name_len(eb, ref);
3329
3330         if (name_len <= BTRFS_NAME_LEN) {
3331                 len = name_len;
3332                 error = 0;
3333         } else {
3334                 len = BTRFS_NAME_LEN;
3335                 error = REF_ERR_NAME_TOO_LONG;
3336         }
3337         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3338
3339         if (key->type == BTRFS_ROOT_REF_KEY) {
3340                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3341                                  index, namebuf, len, key->type, error);
3342         } else {
3343                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3344                                  index, namebuf, len, key->type, error);
3345         }
3346         return 0;
3347 }
3348
3349 static void free_corrupt_block(struct cache_extent *cache)
3350 {
3351         struct btrfs_corrupt_block *corrupt;
3352
3353         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3354         free(corrupt);
3355 }
3356
3357 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3358
3359 /*
3360  * Repair the btree of the given root.
3361  *
3362  * The fix is to remove the node key in corrupt_blocks cache_tree.
3363  * and rebalance the tree.
3364  * After the fix, the btree should be writeable.
3365  */
3366 static int repair_btree(struct btrfs_root *root,
3367                         struct cache_tree *corrupt_blocks)
3368 {
3369         struct btrfs_trans_handle *trans;
3370         struct btrfs_path *path;
3371         struct btrfs_corrupt_block *corrupt;
3372         struct cache_extent *cache;
3373         struct btrfs_key key;
3374         u64 offset;
3375         int level;
3376         int ret = 0;
3377
3378         if (cache_tree_empty(corrupt_blocks))
3379                 return 0;
3380
3381         path = btrfs_alloc_path();
3382         if (!path)
3383                 return -ENOMEM;
3384
3385         trans = btrfs_start_transaction(root, 1);
3386         if (IS_ERR(trans)) {
3387                 ret = PTR_ERR(trans);
3388                 fprintf(stderr, "Error starting transaction: %s\n",
3389                         strerror(-ret));
3390                 goto out_free_path;
3391         }
3392         cache = first_cache_extent(corrupt_blocks);
3393         while (cache) {
3394                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3395                                        cache);
3396                 level = corrupt->level;
3397                 path->lowest_level = level;
3398                 key.objectid = corrupt->key.objectid;
3399                 key.type = corrupt->key.type;
3400                 key.offset = corrupt->key.offset;
3401
3402                 /*
3403                  * Here we don't want to do any tree balance, since it may
3404                  * cause a balance with corrupted brother leaf/node,
3405                  * so ins_len set to 0 here.
3406                  * Balance will be done after all corrupt node/leaf is deleted.
3407                  */
3408                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3409                 if (ret < 0)
3410                         goto out;
3411                 offset = btrfs_node_blockptr(path->nodes[level],
3412                                              path->slots[level]);
3413
3414                 /* Remove the ptr */
3415                 ret = btrfs_del_ptr(trans, root, path, level,
3416                                     path->slots[level]);
3417                 if (ret < 0)
3418                         goto out;
3419                 /*
3420                  * Remove the corresponding extent
3421                  * return value is not concerned.
3422                  */
3423                 btrfs_release_path(path);
3424                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3425                                         0, root->root_key.objectid,
3426                                         level - 1, 0);
3427                 cache = next_cache_extent(cache);
3428         }
3429
3430         /* Balance the btree using btrfs_search_slot() */
3431         cache = first_cache_extent(corrupt_blocks);
3432         while (cache) {
3433                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3434                                        cache);
3435                 memcpy(&key, &corrupt->key, sizeof(key));
3436                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3437                 if (ret < 0)
3438                         goto out;
3439                 /* return will always >0 since it won't find the item */
3440                 ret = 0;
3441                 btrfs_release_path(path);
3442                 cache = next_cache_extent(cache);
3443         }
3444 out:
3445         btrfs_commit_transaction(trans, root);
3446 out_free_path:
3447         btrfs_free_path(path);
3448         return ret;
3449 }
3450
3451 static int check_fs_root(struct btrfs_root *root,
3452                          struct cache_tree *root_cache,
3453                          struct walk_control *wc)
3454 {
3455         int ret = 0;
3456         int err = 0;
3457         int wret;
3458         int level;
3459         struct btrfs_path path;
3460         struct shared_node root_node;
3461         struct root_record *rec;
3462         struct btrfs_root_item *root_item = &root->root_item;
3463         struct cache_tree corrupt_blocks;
3464         struct orphan_data_extent *orphan;
3465         struct orphan_data_extent *tmp;
3466         enum btrfs_tree_block_status status;
3467
3468         /*
3469          * Reuse the corrupt_block cache tree to record corrupted tree block
3470          *
3471          * Unlike the usage in extent tree check, here we do it in a per
3472          * fs/subvol tree base.
3473          */
3474         cache_tree_init(&corrupt_blocks);
3475         root->fs_info->corrupt_blocks = &corrupt_blocks;
3476
3477         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3478                 rec = get_root_rec(root_cache, root->root_key.objectid);
3479                 if (btrfs_root_refs(root_item) > 0)
3480                         rec->found_root_item = 1;
3481         }
3482
3483         btrfs_init_path(&path);
3484         memset(&root_node, 0, sizeof(root_node));
3485         cache_tree_init(&root_node.root_cache);
3486         cache_tree_init(&root_node.inode_cache);
3487
3488         /* Move the orphan extent record to corresponding inode_record */
3489         list_for_each_entry_safe(orphan, tmp,
3490                                  &root->orphan_data_extents, list) {
3491                 struct inode_record *inode;
3492
3493                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3494                                       1);
3495                 BUG_ON(IS_ERR(inode));
3496                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3497                 list_move(&orphan->list, &inode->orphan_extents);
3498         }
3499
3500         level = btrfs_header_level(root->node);
3501         memset(wc->nodes, 0, sizeof(wc->nodes));
3502         wc->nodes[level] = &root_node;
3503         wc->active_node = level;
3504         wc->root_level = level;
3505
3506         /* We may not have checked the root block, lets do that now */
3507         if (btrfs_is_leaf(root->node))
3508                 status = btrfs_check_leaf(root, NULL, root->node);
3509         else
3510                 status = btrfs_check_node(root, NULL, root->node);
3511         if (status != BTRFS_TREE_BLOCK_CLEAN)
3512                 return -EIO;
3513
3514         if (btrfs_root_refs(root_item) > 0 ||
3515             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3516                 path.nodes[level] = root->node;
3517                 extent_buffer_get(root->node);
3518                 path.slots[level] = 0;
3519         } else {
3520                 struct btrfs_key key;
3521                 struct btrfs_disk_key found_key;
3522
3523                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3524                 level = root_item->drop_level;
3525                 path.lowest_level = level;
3526                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3527                 if (wret < 0)
3528                         goto skip_walking;
3529                 btrfs_node_key(path.nodes[level], &found_key,
3530                                 path.slots[level]);
3531                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3532                                         sizeof(found_key)));
3533         }
3534
3535         while (1) {
3536                 wret = walk_down_tree(root, &path, wc, &level);
3537                 if (wret < 0)
3538                         ret = wret;
3539                 if (wret != 0)
3540                         break;
3541
3542                 wret = walk_up_tree(root, &path, wc, &level);
3543                 if (wret < 0)
3544                         ret = wret;
3545                 if (wret != 0)
3546                         break;
3547         }
3548 skip_walking:
3549         btrfs_release_path(&path);
3550
3551         if (!cache_tree_empty(&corrupt_blocks)) {
3552                 struct cache_extent *cache;
3553                 struct btrfs_corrupt_block *corrupt;
3554
3555                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3556                        root->root_key.objectid);
3557                 cache = first_cache_extent(&corrupt_blocks);
3558                 while (cache) {
3559                         corrupt = container_of(cache,
3560                                                struct btrfs_corrupt_block,
3561                                                cache);
3562                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3563                                cache->start, corrupt->level,
3564                                corrupt->key.objectid, corrupt->key.type,
3565                                corrupt->key.offset);
3566                         cache = next_cache_extent(cache);
3567                 }
3568                 if (repair) {
3569                         printf("Try to repair the btree for root %llu\n",
3570                                root->root_key.objectid);
3571                         ret = repair_btree(root, &corrupt_blocks);
3572                         if (ret < 0)
3573                                 fprintf(stderr, "Failed to repair btree: %s\n",
3574                                         strerror(-ret));
3575                         if (!ret)
3576                                 printf("Btree for root %llu is fixed\n",
3577                                        root->root_key.objectid);
3578                 }
3579         }
3580
3581         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3582         if (err < 0)
3583                 ret = err;
3584
3585         if (root_node.current) {
3586                 root_node.current->checked = 1;
3587                 maybe_free_inode_rec(&root_node.inode_cache,
3588                                 root_node.current);
3589         }
3590
3591         err = check_inode_recs(root, &root_node.inode_cache);
3592         if (!ret)
3593                 ret = err;
3594
3595         free_corrupt_blocks_tree(&corrupt_blocks);
3596         root->fs_info->corrupt_blocks = NULL;
3597         free_orphan_data_extents(&root->orphan_data_extents);
3598         return ret;
3599 }
3600
3601 static int fs_root_objectid(u64 objectid)
3602 {
3603         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3604             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3605                 return 1;
3606         return is_fstree(objectid);
3607 }
3608
3609 static int check_fs_roots(struct btrfs_root *root,
3610                           struct cache_tree *root_cache)
3611 {
3612         struct btrfs_path path;
3613         struct btrfs_key key;
3614         struct walk_control wc;
3615         struct extent_buffer *leaf, *tree_node;
3616         struct btrfs_root *tmp_root;
3617         struct btrfs_root *tree_root = root->fs_info->tree_root;
3618         int ret;
3619         int err = 0;
3620
3621         if (ctx.progress_enabled) {
3622                 ctx.tp = TASK_FS_ROOTS;
3623                 task_start(ctx.info);
3624         }
3625
3626         /*
3627          * Just in case we made any changes to the extent tree that weren't
3628          * reflected into the free space cache yet.
3629          */
3630         if (repair)
3631                 reset_cached_block_groups(root->fs_info);
3632         memset(&wc, 0, sizeof(wc));
3633         cache_tree_init(&wc.shared);
3634         btrfs_init_path(&path);
3635
3636 again:
3637         key.offset = 0;
3638         key.objectid = 0;
3639         key.type = BTRFS_ROOT_ITEM_KEY;
3640         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3641         if (ret < 0) {
3642                 err = 1;
3643                 goto out;
3644         }
3645         tree_node = tree_root->node;
3646         while (1) {
3647                 if (tree_node != tree_root->node) {
3648                         free_root_recs_tree(root_cache);
3649                         btrfs_release_path(&path);
3650                         goto again;
3651                 }
3652                 leaf = path.nodes[0];
3653                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3654                         ret = btrfs_next_leaf(tree_root, &path);
3655                         if (ret) {
3656                                 if (ret < 0)
3657                                         err = 1;
3658                                 break;
3659                         }
3660                         leaf = path.nodes[0];
3661                 }
3662                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3663                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3664                     fs_root_objectid(key.objectid)) {
3665                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3666                                 tmp_root = btrfs_read_fs_root_no_cache(
3667                                                 root->fs_info, &key);
3668                         } else {
3669                                 key.offset = (u64)-1;
3670                                 tmp_root = btrfs_read_fs_root(
3671                                                 root->fs_info, &key);
3672                         }
3673                         if (IS_ERR(tmp_root)) {
3674                                 err = 1;
3675                                 goto next;
3676                         }
3677                         ret = check_fs_root(tmp_root, root_cache, &wc);
3678                         if (ret == -EAGAIN) {
3679                                 free_root_recs_tree(root_cache);
3680                                 btrfs_release_path(&path);
3681                                 goto again;
3682                         }
3683                         if (ret)
3684                                 err = 1;
3685                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3686                                 btrfs_free_fs_root(tmp_root);
3687                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3688                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3689                         process_root_ref(leaf, path.slots[0], &key,
3690                                          root_cache);
3691                 }
3692 next:
3693                 path.slots[0]++;
3694         }
3695 out:
3696         btrfs_release_path(&path);
3697         if (err)
3698                 free_extent_cache_tree(&wc.shared);
3699         if (!cache_tree_empty(&wc.shared))
3700                 fprintf(stderr, "warning line %d\n", __LINE__);
3701
3702         task_stop(ctx.info);
3703
3704         return err;
3705 }
3706
3707 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3708 {
3709         struct list_head *cur = rec->backrefs.next;
3710         struct extent_backref *back;
3711         struct tree_backref *tback;
3712         struct data_backref *dback;
3713         u64 found = 0;
3714         int err = 0;
3715
3716         while(cur != &rec->backrefs) {
3717                 back = list_entry(cur, struct extent_backref, list);
3718                 cur = cur->next;
3719                 if (!back->found_extent_tree) {
3720                         err = 1;
3721                         if (!print_errs)
3722                                 goto out;
3723                         if (back->is_data) {
3724                                 dback = (struct data_backref *)back;
3725                                 fprintf(stderr, "Backref %llu %s %llu"
3726                                         " owner %llu offset %llu num_refs %lu"
3727                                         " not found in extent tree\n",
3728                                         (unsigned long long)rec->start,
3729                                         back->full_backref ?
3730                                         "parent" : "root",
3731                                         back->full_backref ?
3732                                         (unsigned long long)dback->parent:
3733                                         (unsigned long long)dback->root,
3734                                         (unsigned long long)dback->owner,
3735                                         (unsigned long long)dback->offset,
3736                                         (unsigned long)dback->num_refs);
3737                         } else {
3738                                 tback = (struct tree_backref *)back;
3739                                 fprintf(stderr, "Backref %llu parent %llu"
3740                                         " root %llu not found in extent tree\n",
3741                                         (unsigned long long)rec->start,
3742                                         (unsigned long long)tback->parent,
3743                                         (unsigned long long)tback->root);
3744                         }
3745                 }
3746                 if (!back->is_data && !back->found_ref) {
3747                         err = 1;
3748                         if (!print_errs)
3749                                 goto out;
3750                         tback = (struct tree_backref *)back;
3751                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3752                                 (unsigned long long)rec->start,
3753                                 back->full_backref ? "parent" : "root",
3754                                 back->full_backref ?
3755                                 (unsigned long long)tback->parent :
3756                                 (unsigned long long)tback->root, back);
3757                 }
3758                 if (back->is_data) {
3759                         dback = (struct data_backref *)back;
3760                         if (dback->found_ref != dback->num_refs) {
3761                                 err = 1;
3762                                 if (!print_errs)
3763                                         goto out;
3764                                 fprintf(stderr, "Incorrect local backref count"
3765                                         " on %llu %s %llu owner %llu"
3766                                         " offset %llu found %u wanted %u back %p\n",
3767                                         (unsigned long long)rec->start,
3768                                         back->full_backref ?
3769                                         "parent" : "root",
3770                                         back->full_backref ?
3771                                         (unsigned long long)dback->parent:
3772                                         (unsigned long long)dback->root,
3773                                         (unsigned long long)dback->owner,
3774                                         (unsigned long long)dback->offset,
3775                                         dback->found_ref, dback->num_refs, back);
3776                         }
3777                         if (dback->disk_bytenr != rec->start) {
3778                                 err = 1;
3779                                 if (!print_errs)
3780                                         goto out;
3781                                 fprintf(stderr, "Backref disk bytenr does not"
3782                                         " match extent record, bytenr=%llu, "
3783                                         "ref bytenr=%llu\n",
3784                                         (unsigned long long)rec->start,
3785                                         (unsigned long long)dback->disk_bytenr);
3786                         }
3787
3788                         if (dback->bytes != rec->nr) {
3789                                 err = 1;
3790                                 if (!print_errs)
3791                                         goto out;
3792                                 fprintf(stderr, "Backref bytes do not match "
3793                                         "extent backref, bytenr=%llu, ref "
3794                                         "bytes=%llu, backref bytes=%llu\n",
3795                                         (unsigned long long)rec->start,
3796                                         (unsigned long long)rec->nr,
3797                                         (unsigned long long)dback->bytes);
3798                         }
3799                 }
3800                 if (!back->is_data) {
3801                         found += 1;
3802                 } else {
3803                         dback = (struct data_backref *)back;
3804                         found += dback->found_ref;
3805                 }
3806         }
3807         if (found != rec->refs) {
3808                 err = 1;
3809                 if (!print_errs)
3810                         goto out;
3811                 fprintf(stderr, "Incorrect global backref count "
3812                         "on %llu found %llu wanted %llu\n",
3813                         (unsigned long long)rec->start,
3814                         (unsigned long long)found,
3815                         (unsigned long long)rec->refs);
3816         }
3817 out:
3818         return err;
3819 }
3820
3821 static int free_all_extent_backrefs(struct extent_record *rec)
3822 {
3823         struct extent_backref *back;
3824         struct list_head *cur;
3825         while (!list_empty(&rec->backrefs)) {
3826                 cur = rec->backrefs.next;
3827                 back = list_entry(cur, struct extent_backref, list);
3828                 list_del(cur);
3829                 free(back);
3830         }
3831         return 0;
3832 }
3833
3834 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3835                                      struct cache_tree *extent_cache)
3836 {
3837         struct cache_extent *cache;
3838         struct extent_record *rec;
3839
3840         while (1) {
3841                 cache = first_cache_extent(extent_cache);
3842                 if (!cache)
3843                         break;
3844                 rec = container_of(cache, struct extent_record, cache);
3845                 remove_cache_extent(extent_cache, cache);
3846                 free_all_extent_backrefs(rec);
3847                 free(rec);
3848         }
3849 }
3850
3851 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3852                                  struct extent_record *rec)
3853 {
3854         if (rec->content_checked && rec->owner_ref_checked &&
3855             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3856             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3857             !rec->bad_full_backref && !rec->crossing_stripes &&
3858             !rec->wrong_chunk_type) {
3859                 remove_cache_extent(extent_cache, &rec->cache);
3860                 free_all_extent_backrefs(rec);
3861                 list_del_init(&rec->list);
3862                 free(rec);
3863         }
3864         return 0;
3865 }
3866
3867 static int check_owner_ref(struct btrfs_root *root,
3868                             struct extent_record *rec,
3869                             struct extent_buffer *buf)
3870 {
3871         struct extent_backref *node;
3872         struct tree_backref *back;
3873         struct btrfs_root *ref_root;
3874         struct btrfs_key key;
3875         struct btrfs_path path;
3876         struct extent_buffer *parent;
3877         int level;
3878         int found = 0;
3879         int ret;
3880
3881         list_for_each_entry(node, &rec->backrefs, list) {
3882                 if (node->is_data)
3883                         continue;
3884                 if (!node->found_ref)
3885                         continue;
3886                 if (node->full_backref)
3887                         continue;
3888                 back = (struct tree_backref *)node;
3889                 if (btrfs_header_owner(buf) == back->root)
3890                         return 0;
3891         }
3892         BUG_ON(rec->is_root);
3893
3894         /* try to find the block by search corresponding fs tree */
3895         key.objectid = btrfs_header_owner(buf);
3896         key.type = BTRFS_ROOT_ITEM_KEY;
3897         key.offset = (u64)-1;
3898
3899         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3900         if (IS_ERR(ref_root))
3901                 return 1;
3902
3903         level = btrfs_header_level(buf);
3904         if (level == 0)
3905                 btrfs_item_key_to_cpu(buf, &key, 0);
3906         else
3907                 btrfs_node_key_to_cpu(buf, &key, 0);
3908
3909         btrfs_init_path(&path);
3910         path.lowest_level = level + 1;
3911         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3912         if (ret < 0)
3913                 return 0;
3914
3915         parent = path.nodes[level + 1];
3916         if (parent && buf->start == btrfs_node_blockptr(parent,
3917                                                         path.slots[level + 1]))
3918                 found = 1;
3919
3920         btrfs_release_path(&path);
3921         return found ? 0 : 1;
3922 }
3923
3924 static int is_extent_tree_record(struct extent_record *rec)
3925 {
3926         struct list_head *cur = rec->backrefs.next;
3927         struct extent_backref *node;
3928         struct tree_backref *back;
3929         int is_extent = 0;
3930
3931         while(cur != &rec->backrefs) {
3932                 node = list_entry(cur, struct extent_backref, list);
3933                 cur = cur->next;
3934                 if (node->is_data)
3935                         return 0;
3936                 back = (struct tree_backref *)node;
3937                 if (node->full_backref)
3938                         return 0;
3939                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3940                         is_extent = 1;
3941         }
3942         return is_extent;
3943 }
3944
3945
3946 static int record_bad_block_io(struct btrfs_fs_info *info,
3947                                struct cache_tree *extent_cache,
3948                                u64 start, u64 len)
3949 {
3950         struct extent_record *rec;
3951         struct cache_extent *cache;
3952         struct btrfs_key key;
3953
3954         cache = lookup_cache_extent(extent_cache, start, len);
3955         if (!cache)
3956                 return 0;
3957
3958         rec = container_of(cache, struct extent_record, cache);
3959         if (!is_extent_tree_record(rec))
3960                 return 0;
3961
3962         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3963         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3964 }
3965
3966 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3967                        struct extent_buffer *buf, int slot)
3968 {
3969         if (btrfs_header_level(buf)) {
3970                 struct btrfs_key_ptr ptr1, ptr2;
3971
3972                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3973                                    sizeof(struct btrfs_key_ptr));
3974                 read_extent_buffer(buf, &ptr2,
3975                                    btrfs_node_key_ptr_offset(slot + 1),
3976                                    sizeof(struct btrfs_key_ptr));
3977                 write_extent_buffer(buf, &ptr1,
3978                                     btrfs_node_key_ptr_offset(slot + 1),
3979                                     sizeof(struct btrfs_key_ptr));
3980                 write_extent_buffer(buf, &ptr2,
3981                                     btrfs_node_key_ptr_offset(slot),
3982                                     sizeof(struct btrfs_key_ptr));
3983                 if (slot == 0) {
3984                         struct btrfs_disk_key key;
3985                         btrfs_node_key(buf, &key, 0);
3986                         btrfs_fixup_low_keys(root, path, &key,
3987                                              btrfs_header_level(buf) + 1);
3988                 }
3989         } else {
3990                 struct btrfs_item *item1, *item2;
3991                 struct btrfs_key k1, k2;
3992                 char *item1_data, *item2_data;
3993                 u32 item1_offset, item2_offset, item1_size, item2_size;
3994
3995                 item1 = btrfs_item_nr(slot);
3996                 item2 = btrfs_item_nr(slot + 1);
3997                 btrfs_item_key_to_cpu(buf, &k1, slot);
3998                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3999                 item1_offset = btrfs_item_offset(buf, item1);
4000                 item2_offset = btrfs_item_offset(buf, item2);
4001                 item1_size = btrfs_item_size(buf, item1);
4002                 item2_size = btrfs_item_size(buf, item2);
4003
4004                 item1_data = malloc(item1_size);
4005                 if (!item1_data)
4006                         return -ENOMEM;
4007                 item2_data = malloc(item2_size);
4008                 if (!item2_data) {
4009                         free(item1_data);
4010                         return -ENOMEM;
4011                 }
4012
4013                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4014                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4015
4016                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4017                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4018                 free(item1_data);
4019                 free(item2_data);
4020
4021                 btrfs_set_item_offset(buf, item1, item2_offset);
4022                 btrfs_set_item_offset(buf, item2, item1_offset);
4023                 btrfs_set_item_size(buf, item1, item2_size);
4024                 btrfs_set_item_size(buf, item2, item1_size);
4025
4026                 path->slots[0] = slot;
4027                 btrfs_set_item_key_unsafe(root, path, &k2);
4028                 path->slots[0] = slot + 1;
4029                 btrfs_set_item_key_unsafe(root, path, &k1);
4030         }
4031         return 0;
4032 }
4033
4034 static int fix_key_order(struct btrfs_trans_handle *trans,
4035                          struct btrfs_root *root,
4036                          struct btrfs_path *path)
4037 {
4038         struct extent_buffer *buf;
4039         struct btrfs_key k1, k2;
4040         int i;
4041         int level = path->lowest_level;
4042         int ret = -EIO;
4043
4044         buf = path->nodes[level];
4045         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4046                 if (level) {
4047                         btrfs_node_key_to_cpu(buf, &k1, i);
4048                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4049                 } else {
4050                         btrfs_item_key_to_cpu(buf, &k1, i);
4051                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4052                 }
4053                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4054                         continue;
4055                 ret = swap_values(root, path, buf, i);
4056                 if (ret)
4057                         break;
4058                 btrfs_mark_buffer_dirty(buf);
4059                 i = 0;
4060         }
4061         return ret;
4062 }
4063
4064 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4065                              struct btrfs_root *root,
4066                              struct btrfs_path *path,
4067                              struct extent_buffer *buf, int slot)
4068 {
4069         struct btrfs_key key;
4070         int nritems = btrfs_header_nritems(buf);
4071
4072         btrfs_item_key_to_cpu(buf, &key, slot);
4073
4074         /* These are all the keys we can deal with missing. */
4075         if (key.type != BTRFS_DIR_INDEX_KEY &&
4076             key.type != BTRFS_EXTENT_ITEM_KEY &&
4077             key.type != BTRFS_METADATA_ITEM_KEY &&
4078             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4079             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4080                 return -1;
4081
4082         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4083                (unsigned long long)key.objectid, key.type,
4084                (unsigned long long)key.offset, slot, buf->start);
4085         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4086                               btrfs_item_nr_offset(slot + 1),
4087                               sizeof(struct btrfs_item) *
4088                               (nritems - slot - 1));
4089         btrfs_set_header_nritems(buf, nritems - 1);
4090         if (slot == 0) {
4091                 struct btrfs_disk_key disk_key;
4092
4093                 btrfs_item_key(buf, &disk_key, 0);
4094                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4095         }
4096         btrfs_mark_buffer_dirty(buf);
4097         return 0;
4098 }
4099
4100 static int fix_item_offset(struct btrfs_trans_handle *trans,
4101                            struct btrfs_root *root,
4102                            struct btrfs_path *path)
4103 {
4104         struct extent_buffer *buf;
4105         int i;
4106         int ret = 0;
4107
4108         /* We should only get this for leaves */
4109         BUG_ON(path->lowest_level);
4110         buf = path->nodes[0];
4111 again:
4112         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4113                 unsigned int shift = 0, offset;
4114
4115                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4116                     BTRFS_LEAF_DATA_SIZE(root)) {
4117                         if (btrfs_item_end_nr(buf, i) >
4118                             BTRFS_LEAF_DATA_SIZE(root)) {
4119                                 ret = delete_bogus_item(trans, root, path,
4120                                                         buf, i);
4121                                 if (!ret)
4122                                         goto again;
4123                                 fprintf(stderr, "item is off the end of the "
4124                                         "leaf, can't fix\n");
4125                                 ret = -EIO;
4126                                 break;
4127                         }
4128                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4129                                 btrfs_item_end_nr(buf, i);
4130                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4131                            btrfs_item_offset_nr(buf, i - 1)) {
4132                         if (btrfs_item_end_nr(buf, i) >
4133                             btrfs_item_offset_nr(buf, i - 1)) {
4134                                 ret = delete_bogus_item(trans, root, path,
4135                                                         buf, i);
4136                                 if (!ret)
4137                                         goto again;
4138                                 fprintf(stderr, "items overlap, can't fix\n");
4139                                 ret = -EIO;
4140                                 break;
4141                         }
4142                         shift = btrfs_item_offset_nr(buf, i - 1) -
4143                                 btrfs_item_end_nr(buf, i);
4144                 }
4145                 if (!shift)
4146                         continue;
4147
4148                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4149                        i, shift, (unsigned long long)buf->start);
4150                 offset = btrfs_item_offset_nr(buf, i);
4151                 memmove_extent_buffer(buf,
4152                                       btrfs_leaf_data(buf) + offset + shift,
4153                                       btrfs_leaf_data(buf) + offset,
4154                                       btrfs_item_size_nr(buf, i));
4155                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4156                                       offset + shift);
4157                 btrfs_mark_buffer_dirty(buf);
4158         }
4159
4160         /*
4161          * We may have moved things, in which case we want to exit so we don't
4162          * write those changes out.  Once we have proper abort functionality in
4163          * progs this can be changed to something nicer.
4164          */
4165         BUG_ON(ret);
4166         return ret;
4167 }
4168
4169 /*
4170  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4171  * then just return -EIO.
4172  */
4173 static int try_to_fix_bad_block(struct btrfs_root *root,
4174                                 struct extent_buffer *buf,
4175                                 enum btrfs_tree_block_status status)
4176 {
4177         struct btrfs_trans_handle *trans;
4178         struct ulist *roots;
4179         struct ulist_node *node;
4180         struct btrfs_root *search_root;
4181         struct btrfs_path *path;
4182         struct ulist_iterator iter;
4183         struct btrfs_key root_key, key;
4184         int ret;
4185
4186         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4187             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4188                 return -EIO;
4189
4190         path = btrfs_alloc_path();
4191         if (!path)
4192                 return -EIO;
4193
4194         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4195                                    0, &roots);
4196         if (ret) {
4197                 btrfs_free_path(path);
4198                 return -EIO;
4199         }
4200
4201         ULIST_ITER_INIT(&iter);
4202         while ((node = ulist_next(roots, &iter))) {
4203                 root_key.objectid = node->val;
4204                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4205                 root_key.offset = (u64)-1;
4206
4207                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4208                 if (IS_ERR(root)) {
4209                         ret = -EIO;
4210                         break;
4211                 }
4212
4213
4214                 trans = btrfs_start_transaction(search_root, 0);
4215                 if (IS_ERR(trans)) {
4216                         ret = PTR_ERR(trans);
4217                         break;
4218                 }
4219
4220                 path->lowest_level = btrfs_header_level(buf);
4221                 path->skip_check_block = 1;
4222                 if (path->lowest_level)
4223                         btrfs_node_key_to_cpu(buf, &key, 0);
4224                 else
4225                         btrfs_item_key_to_cpu(buf, &key, 0);
4226                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4227                 if (ret) {
4228                         ret = -EIO;
4229                         btrfs_commit_transaction(trans, search_root);
4230                         break;
4231                 }
4232                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4233                         ret = fix_key_order(trans, search_root, path);
4234                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4235                         ret = fix_item_offset(trans, search_root, path);
4236                 if (ret) {
4237                         btrfs_commit_transaction(trans, search_root);
4238                         break;
4239                 }
4240                 btrfs_release_path(path);
4241                 btrfs_commit_transaction(trans, search_root);
4242         }
4243         ulist_free(roots);
4244         btrfs_free_path(path);
4245         return ret;
4246 }
4247
4248 static int check_block(struct btrfs_root *root,
4249                        struct cache_tree *extent_cache,
4250                        struct extent_buffer *buf, u64 flags)
4251 {
4252         struct extent_record *rec;
4253         struct cache_extent *cache;
4254         struct btrfs_key key;
4255         enum btrfs_tree_block_status status;
4256         int ret = 0;
4257         int level;
4258
4259         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4260         if (!cache)
4261                 return 1;
4262         rec = container_of(cache, struct extent_record, cache);
4263         rec->generation = btrfs_header_generation(buf);
4264
4265         level = btrfs_header_level(buf);
4266         if (btrfs_header_nritems(buf) > 0) {
4267
4268                 if (level == 0)
4269                         btrfs_item_key_to_cpu(buf, &key, 0);
4270                 else
4271                         btrfs_node_key_to_cpu(buf, &key, 0);
4272
4273                 rec->info_objectid = key.objectid;
4274         }
4275         rec->info_level = level;
4276
4277         if (btrfs_is_leaf(buf))
4278                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4279         else
4280                 status = btrfs_check_node(root, &rec->parent_key, buf);
4281
4282         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4283                 if (repair)
4284                         status = try_to_fix_bad_block(root, buf, status);
4285                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4286                         ret = -EIO;
4287                         fprintf(stderr, "bad block %llu\n",
4288                                 (unsigned long long)buf->start);
4289                 } else {
4290                         /*
4291                          * Signal to callers we need to start the scan over
4292                          * again since we'll have cow'ed blocks.
4293                          */
4294                         ret = -EAGAIN;
4295                 }
4296         } else {
4297                 rec->content_checked = 1;
4298                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4299                         rec->owner_ref_checked = 1;
4300                 else {
4301                         ret = check_owner_ref(root, rec, buf);
4302                         if (!ret)
4303                                 rec->owner_ref_checked = 1;
4304                 }
4305         }
4306         if (!ret)
4307                 maybe_free_extent_rec(extent_cache, rec);
4308         return ret;
4309 }
4310
4311 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4312                                                 u64 parent, u64 root)
4313 {
4314         struct list_head *cur = rec->backrefs.next;
4315         struct extent_backref *node;
4316         struct tree_backref *back;
4317
4318         while(cur != &rec->backrefs) {
4319                 node = list_entry(cur, struct extent_backref, list);
4320                 cur = cur->next;
4321                 if (node->is_data)
4322                         continue;
4323                 back = (struct tree_backref *)node;
4324                 if (parent > 0) {
4325                         if (!node->full_backref)
4326                                 continue;
4327                         if (parent == back->parent)
4328                                 return back;
4329                 } else {
4330                         if (node->full_backref)
4331                                 continue;
4332                         if (back->root == root)
4333                                 return back;
4334                 }
4335         }
4336         return NULL;
4337 }
4338
4339 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4340                                                 u64 parent, u64 root)
4341 {
4342         struct tree_backref *ref = malloc(sizeof(*ref));
4343         memset(&ref->node, 0, sizeof(ref->node));
4344         if (parent > 0) {
4345                 ref->parent = parent;
4346                 ref->node.full_backref = 1;
4347         } else {
4348                 ref->root = root;
4349                 ref->node.full_backref = 0;
4350         }
4351         list_add_tail(&ref->node.list, &rec->backrefs);
4352
4353         return ref;
4354 }
4355
4356 static struct data_backref *find_data_backref(struct extent_record *rec,
4357                                                 u64 parent, u64 root,
4358                                                 u64 owner, u64 offset,
4359                                                 int found_ref,
4360                                                 u64 disk_bytenr, u64 bytes)
4361 {
4362         struct list_head *cur = rec->backrefs.next;
4363         struct extent_backref *node;
4364         struct data_backref *back;
4365
4366         while(cur != &rec->backrefs) {
4367                 node = list_entry(cur, struct extent_backref, list);
4368                 cur = cur->next;
4369                 if (!node->is_data)
4370                         continue;
4371                 back = (struct data_backref *)node;
4372                 if (parent > 0) {
4373                         if (!node->full_backref)
4374                                 continue;
4375                         if (parent == back->parent)
4376                                 return back;
4377                 } else {
4378                         if (node->full_backref)
4379                                 continue;
4380                         if (back->root == root && back->owner == owner &&
4381                             back->offset == offset) {
4382                                 if (found_ref && node->found_ref &&
4383                                     (back->bytes != bytes ||
4384                                     back->disk_bytenr != disk_bytenr))
4385                                         continue;
4386                                 return back;
4387                         }
4388                 }
4389         }
4390         return NULL;
4391 }
4392
4393 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4394                                                 u64 parent, u64 root,
4395                                                 u64 owner, u64 offset,
4396                                                 u64 max_size)
4397 {
4398         struct data_backref *ref = malloc(sizeof(*ref));
4399         memset(&ref->node, 0, sizeof(ref->node));
4400         ref->node.is_data = 1;
4401
4402         if (parent > 0) {
4403                 ref->parent = parent;
4404                 ref->owner = 0;
4405                 ref->offset = 0;
4406                 ref->node.full_backref = 1;
4407         } else {
4408                 ref->root = root;
4409                 ref->owner = owner;
4410                 ref->offset = offset;
4411                 ref->node.full_backref = 0;
4412         }
4413         ref->bytes = max_size;
4414         ref->found_ref = 0;
4415         ref->num_refs = 0;
4416         list_add_tail(&ref->node.list, &rec->backrefs);
4417         if (max_size > rec->max_size)
4418                 rec->max_size = max_size;
4419         return ref;
4420 }
4421
4422 /* Check if the type of extent matches with its chunk */
4423 static void check_extent_type(struct extent_record *rec)
4424 {
4425         struct btrfs_block_group_cache *bg_cache;
4426
4427         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4428         if (!bg_cache)
4429                 return;
4430
4431         /* data extent, check chunk directly*/
4432         if (!rec->metadata) {
4433                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4434                         rec->wrong_chunk_type = 1;
4435                 return;
4436         }
4437
4438         /* metadata extent, check the obvious case first */
4439         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4440                                  BTRFS_BLOCK_GROUP_METADATA))) {
4441                 rec->wrong_chunk_type = 1;
4442                 return;
4443         }
4444
4445         /*
4446          * Check SYSTEM extent, as it's also marked as metadata, we can only
4447          * make sure it's a SYSTEM extent by its backref
4448          */
4449         if (!list_empty(&rec->backrefs)) {
4450                 struct extent_backref *node;
4451                 struct tree_backref *tback;
4452                 u64 bg_type;
4453
4454                 node = list_entry(rec->backrefs.next, struct extent_backref,
4455                                   list);
4456                 if (node->is_data) {
4457                         /* tree block shouldn't have data backref */
4458                         rec->wrong_chunk_type = 1;
4459                         return;
4460                 }
4461                 tback = container_of(node, struct tree_backref, node);
4462
4463                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4464                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4465                 else
4466                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4467                 if (!(bg_cache->flags & bg_type))
4468                         rec->wrong_chunk_type = 1;
4469         }
4470 }
4471
4472 static int add_extent_rec(struct cache_tree *extent_cache,
4473                           struct btrfs_key *parent_key, u64 parent_gen,
4474                           u64 start, u64 nr, u64 extent_item_refs,
4475                           int is_root, int inc_ref, int set_checked,
4476                           int metadata, int extent_rec, u64 max_size)
4477 {
4478         struct extent_record *rec;
4479         struct cache_extent *cache;
4480         int ret = 0;
4481         int dup = 0;
4482
4483         cache = lookup_cache_extent(extent_cache, start, nr);
4484         if (cache) {
4485                 rec = container_of(cache, struct extent_record, cache);
4486                 if (inc_ref)
4487                         rec->refs++;
4488                 if (rec->nr == 1)
4489                         rec->nr = max(nr, max_size);
4490
4491                 /*
4492                  * We need to make sure to reset nr to whatever the extent
4493                  * record says was the real size, this way we can compare it to
4494                  * the backrefs.
4495                  */
4496                 if (extent_rec) {
4497                         if (start != rec->start || rec->found_rec) {
4498                                 struct extent_record *tmp;
4499
4500                                 dup = 1;
4501                                 if (list_empty(&rec->list))
4502                                         list_add_tail(&rec->list,
4503                                                       &duplicate_extents);
4504
4505                                 /*
4506                                  * We have to do this song and dance in case we
4507                                  * find an extent record that falls inside of
4508                                  * our current extent record but does not have
4509                                  * the same objectid.
4510                                  */
4511                                 tmp = malloc(sizeof(*tmp));
4512                                 if (!tmp)
4513                                         return -ENOMEM;
4514                                 tmp->start = start;
4515                                 tmp->max_size = max_size;
4516                                 tmp->nr = nr;
4517                                 tmp->found_rec = 1;
4518                                 tmp->metadata = metadata;
4519                                 tmp->extent_item_refs = extent_item_refs;
4520                                 INIT_LIST_HEAD(&tmp->list);
4521                                 list_add_tail(&tmp->list, &rec->dups);
4522                                 rec->num_duplicates++;
4523                         } else {
4524                                 rec->nr = nr;
4525                                 rec->found_rec = 1;
4526                         }
4527                 }
4528
4529                 if (extent_item_refs && !dup) {
4530                         if (rec->extent_item_refs) {
4531                                 fprintf(stderr, "block %llu rec "
4532                                         "extent_item_refs %llu, passed %llu\n",
4533                                         (unsigned long long)start,
4534                                         (unsigned long long)
4535                                                         rec->extent_item_refs,
4536                                         (unsigned long long)extent_item_refs);
4537                         }
4538                         rec->extent_item_refs = extent_item_refs;
4539                 }
4540                 if (is_root)
4541                         rec->is_root = 1;
4542                 if (set_checked) {
4543                         rec->content_checked = 1;
4544                         rec->owner_ref_checked = 1;
4545                 }
4546
4547                 if (parent_key)
4548                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4549                 if (parent_gen)
4550                         rec->parent_generation = parent_gen;
4551
4552                 if (rec->max_size < max_size)
4553                         rec->max_size = max_size;
4554
4555                 /*
4556                  * A metadata extent can't cross stripe_len boundary, otherwise
4557                  * kernel scrub won't be able to handle it.
4558                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4559                  * it.
4560                  */
4561                 if (metadata && check_crossing_stripes(rec->start,
4562                                                        rec->max_size))
4563                                 rec->crossing_stripes = 1;
4564                 check_extent_type(rec);
4565                 maybe_free_extent_rec(extent_cache, rec);
4566                 return ret;
4567         }
4568         rec = malloc(sizeof(*rec));
4569         rec->start = start;
4570         rec->max_size = max_size;
4571         rec->nr = max(nr, max_size);
4572         rec->found_rec = !!extent_rec;
4573         rec->content_checked = 0;
4574         rec->owner_ref_checked = 0;
4575         rec->num_duplicates = 0;
4576         rec->metadata = metadata;
4577         rec->flag_block_full_backref = -1;
4578         rec->bad_full_backref = 0;
4579         rec->crossing_stripes = 0;
4580         rec->wrong_chunk_type = 0;
4581         INIT_LIST_HEAD(&rec->backrefs);
4582         INIT_LIST_HEAD(&rec->dups);
4583         INIT_LIST_HEAD(&rec->list);
4584
4585         if (is_root)
4586                 rec->is_root = 1;
4587         else
4588                 rec->is_root = 0;
4589
4590         if (inc_ref)
4591                 rec->refs = 1;
4592         else
4593                 rec->refs = 0;
4594
4595         if (extent_item_refs)
4596                 rec->extent_item_refs = extent_item_refs;
4597         else
4598                 rec->extent_item_refs = 0;
4599
4600         if (parent_key)
4601                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4602         else
4603                 memset(&rec->parent_key, 0, sizeof(*parent_key));
4604
4605         if (parent_gen)
4606                 rec->parent_generation = parent_gen;
4607         else
4608                 rec->parent_generation = 0;
4609
4610         rec->cache.start = start;
4611         rec->cache.size = nr;
4612         ret = insert_cache_extent(extent_cache, &rec->cache);
4613         BUG_ON(ret);
4614         bytes_used += nr;
4615         if (set_checked) {
4616                 rec->content_checked = 1;
4617                 rec->owner_ref_checked = 1;
4618         }
4619
4620         if (metadata)
4621                 if (check_crossing_stripes(rec->start, rec->max_size))
4622                         rec->crossing_stripes = 1;
4623         check_extent_type(rec);
4624         return ret;
4625 }
4626
4627 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4628                             u64 parent, u64 root, int found_ref)
4629 {
4630         struct extent_record *rec;
4631         struct tree_backref *back;
4632         struct cache_extent *cache;
4633
4634         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4635         if (!cache) {
4636                 add_extent_rec(extent_cache, NULL, 0, bytenr,
4637                                1, 0, 0, 0, 0, 1, 0, 0);
4638                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4639                 if (!cache)
4640                         abort();
4641         }
4642
4643         rec = container_of(cache, struct extent_record, cache);
4644         if (rec->start != bytenr) {
4645                 abort();
4646         }
4647
4648         back = find_tree_backref(rec, parent, root);
4649         if (!back)
4650                 back = alloc_tree_backref(rec, parent, root);
4651
4652         if (found_ref) {
4653                 if (back->node.found_ref) {
4654                         fprintf(stderr, "Extent back ref already exists "
4655                                 "for %llu parent %llu root %llu \n",
4656                                 (unsigned long long)bytenr,
4657                                 (unsigned long long)parent,
4658                                 (unsigned long long)root);
4659                 }
4660                 back->node.found_ref = 1;
4661         } else {
4662                 if (back->node.found_extent_tree) {
4663                         fprintf(stderr, "Extent back ref already exists "
4664                                 "for %llu parent %llu root %llu \n",
4665                                 (unsigned long long)bytenr,
4666                                 (unsigned long long)parent,
4667                                 (unsigned long long)root);
4668                 }
4669                 back->node.found_extent_tree = 1;
4670         }
4671         check_extent_type(rec);
4672         maybe_free_extent_rec(extent_cache, rec);
4673         return 0;
4674 }
4675
4676 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4677                             u64 parent, u64 root, u64 owner, u64 offset,
4678                             u32 num_refs, int found_ref, u64 max_size)
4679 {
4680         struct extent_record *rec;
4681         struct data_backref *back;
4682         struct cache_extent *cache;
4683
4684         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4685         if (!cache) {
4686                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4687                                0, 0, max_size);
4688                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4689                 if (!cache)
4690                         abort();
4691         }
4692
4693         rec = container_of(cache, struct extent_record, cache);
4694         if (rec->max_size < max_size)
4695                 rec->max_size = max_size;
4696
4697         /*
4698          * If found_ref is set then max_size is the real size and must match the
4699          * existing refs.  So if we have already found a ref then we need to
4700          * make sure that this ref matches the existing one, otherwise we need
4701          * to add a new backref so we can notice that the backrefs don't match
4702          * and we need to figure out who is telling the truth.  This is to
4703          * account for that awful fsync bug I introduced where we'd end up with
4704          * a btrfs_file_extent_item that would have its length include multiple
4705          * prealloc extents or point inside of a prealloc extent.
4706          */
4707         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4708                                  bytenr, max_size);
4709         if (!back)
4710                 back = alloc_data_backref(rec, parent, root, owner, offset,
4711                                           max_size);
4712
4713         if (found_ref) {
4714                 BUG_ON(num_refs != 1);
4715                 if (back->node.found_ref)
4716                         BUG_ON(back->bytes != max_size);
4717                 back->node.found_ref = 1;
4718                 back->found_ref += 1;
4719                 back->bytes = max_size;
4720                 back->disk_bytenr = bytenr;
4721                 rec->refs += 1;
4722                 rec->content_checked = 1;
4723                 rec->owner_ref_checked = 1;
4724         } else {
4725                 if (back->node.found_extent_tree) {
4726                         fprintf(stderr, "Extent back ref already exists "
4727                                 "for %llu parent %llu root %llu "
4728                                 "owner %llu offset %llu num_refs %lu\n",
4729                                 (unsigned long long)bytenr,
4730                                 (unsigned long long)parent,
4731                                 (unsigned long long)root,
4732                                 (unsigned long long)owner,
4733                                 (unsigned long long)offset,
4734                                 (unsigned long)num_refs);
4735                 }
4736                 back->num_refs = num_refs;
4737                 back->node.found_extent_tree = 1;
4738         }
4739         maybe_free_extent_rec(extent_cache, rec);
4740         return 0;
4741 }
4742
4743 static int add_pending(struct cache_tree *pending,
4744                        struct cache_tree *seen, u64 bytenr, u32 size)
4745 {
4746         int ret;
4747         ret = add_cache_extent(seen, bytenr, size);
4748         if (ret)
4749                 return ret;
4750         add_cache_extent(pending, bytenr, size);
4751         return 0;
4752 }
4753
4754 static int pick_next_pending(struct cache_tree *pending,
4755                         struct cache_tree *reada,
4756                         struct cache_tree *nodes,
4757                         u64 last, struct block_info *bits, int bits_nr,
4758                         int *reada_bits)
4759 {
4760         unsigned long node_start = last;
4761         struct cache_extent *cache;
4762         int ret;
4763
4764         cache = search_cache_extent(reada, 0);
4765         if (cache) {
4766                 bits[0].start = cache->start;
4767                 bits[0].size = cache->size;
4768                 *reada_bits = 1;
4769                 return 1;
4770         }
4771         *reada_bits = 0;
4772         if (node_start > 32768)
4773                 node_start -= 32768;
4774
4775         cache = search_cache_extent(nodes, node_start);
4776         if (!cache)
4777                 cache = search_cache_extent(nodes, 0);
4778
4779         if (!cache) {
4780                  cache = search_cache_extent(pending, 0);
4781                  if (!cache)
4782                          return 0;
4783                  ret = 0;
4784                  do {
4785                          bits[ret].start = cache->start;
4786                          bits[ret].size = cache->size;
4787                          cache = next_cache_extent(cache);
4788                          ret++;
4789                  } while (cache && ret < bits_nr);
4790                  return ret;
4791         }
4792
4793         ret = 0;
4794         do {
4795                 bits[ret].start = cache->start;
4796                 bits[ret].size = cache->size;
4797                 cache = next_cache_extent(cache);
4798                 ret++;
4799         } while (cache && ret < bits_nr);
4800
4801         if (bits_nr - ret > 8) {
4802                 u64 lookup = bits[0].start + bits[0].size;
4803                 struct cache_extent *next;
4804                 next = search_cache_extent(pending, lookup);
4805                 while(next) {
4806                         if (next->start - lookup > 32768)
4807                                 break;
4808                         bits[ret].start = next->start;
4809                         bits[ret].size = next->size;
4810                         lookup = next->start + next->size;
4811                         ret++;
4812                         if (ret == bits_nr)
4813                                 break;
4814                         next = next_cache_extent(next);
4815                         if (!next)
4816                                 break;
4817                 }
4818         }
4819         return ret;
4820 }
4821
4822 static void free_chunk_record(struct cache_extent *cache)
4823 {
4824         struct chunk_record *rec;
4825
4826         rec = container_of(cache, struct chunk_record, cache);
4827         list_del_init(&rec->list);
4828         list_del_init(&rec->dextents);
4829         free(rec);
4830 }
4831
4832 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4833 {
4834         cache_tree_free_extents(chunk_cache, free_chunk_record);
4835 }
4836
4837 static void free_device_record(struct rb_node *node)
4838 {
4839         struct device_record *rec;
4840
4841         rec = container_of(node, struct device_record, node);
4842         free(rec);
4843 }
4844
4845 FREE_RB_BASED_TREE(device_cache, free_device_record);
4846
4847 int insert_block_group_record(struct block_group_tree *tree,
4848                               struct block_group_record *bg_rec)
4849 {
4850         int ret;
4851
4852         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4853         if (ret)
4854                 return ret;
4855
4856         list_add_tail(&bg_rec->list, &tree->block_groups);
4857         return 0;
4858 }
4859
4860 static void free_block_group_record(struct cache_extent *cache)
4861 {
4862         struct block_group_record *rec;
4863
4864         rec = container_of(cache, struct block_group_record, cache);
4865         list_del_init(&rec->list);
4866         free(rec);
4867 }
4868
4869 void free_block_group_tree(struct block_group_tree *tree)
4870 {
4871         cache_tree_free_extents(&tree->tree, free_block_group_record);
4872 }
4873
4874 int insert_device_extent_record(struct device_extent_tree *tree,
4875                                 struct device_extent_record *de_rec)
4876 {
4877         int ret;
4878
4879         /*
4880          * Device extent is a bit different from the other extents, because
4881          * the extents which belong to the different devices may have the
4882          * same start and size, so we need use the special extent cache
4883          * search/insert functions.
4884          */
4885         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4886         if (ret)
4887                 return ret;
4888
4889         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4890         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4891         return 0;
4892 }
4893
4894 static void free_device_extent_record(struct cache_extent *cache)
4895 {
4896         struct device_extent_record *rec;
4897
4898         rec = container_of(cache, struct device_extent_record, cache);
4899         if (!list_empty(&rec->chunk_list))
4900                 list_del_init(&rec->chunk_list);
4901         if (!list_empty(&rec->device_list))
4902                 list_del_init(&rec->device_list);
4903         free(rec);
4904 }
4905
4906 void free_device_extent_tree(struct device_extent_tree *tree)
4907 {
4908         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4909 }
4910
4911 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4912 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4913                                  struct extent_buffer *leaf, int slot)
4914 {
4915         struct btrfs_extent_ref_v0 *ref0;
4916         struct btrfs_key key;
4917
4918         btrfs_item_key_to_cpu(leaf, &key, slot);
4919         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4920         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4921                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4922         } else {
4923                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4924                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4925         }
4926         return 0;
4927 }
4928 #endif
4929
4930 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4931                                             struct btrfs_key *key,
4932                                             int slot)
4933 {
4934         struct btrfs_chunk *ptr;
4935         struct chunk_record *rec;
4936         int num_stripes, i;
4937
4938         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4939         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4940
4941         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4942         if (!rec) {
4943                 fprintf(stderr, "memory allocation failed\n");
4944                 exit(-1);
4945         }
4946
4947         INIT_LIST_HEAD(&rec->list);
4948         INIT_LIST_HEAD(&rec->dextents);
4949         rec->bg_rec = NULL;
4950
4951         rec->cache.start = key->offset;
4952         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4953
4954         rec->generation = btrfs_header_generation(leaf);
4955
4956         rec->objectid = key->objectid;
4957         rec->type = key->type;
4958         rec->offset = key->offset;
4959
4960         rec->length = rec->cache.size;
4961         rec->owner = btrfs_chunk_owner(leaf, ptr);
4962         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4963         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4964         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4965         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4966         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4967         rec->num_stripes = num_stripes;
4968         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4969
4970         for (i = 0; i < rec->num_stripes; ++i) {
4971                 rec->stripes[i].devid =
4972                         btrfs_stripe_devid_nr(leaf, ptr, i);
4973                 rec->stripes[i].offset =
4974                         btrfs_stripe_offset_nr(leaf, ptr, i);
4975                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4976                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4977                                 BTRFS_UUID_SIZE);
4978         }
4979
4980         return rec;
4981 }
4982
4983 static int process_chunk_item(struct cache_tree *chunk_cache,
4984                               struct btrfs_key *key, struct extent_buffer *eb,
4985                               int slot)
4986 {
4987         struct chunk_record *rec;
4988         int ret = 0;
4989
4990         rec = btrfs_new_chunk_record(eb, key, slot);
4991         ret = insert_cache_extent(chunk_cache, &rec->cache);
4992         if (ret) {
4993                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4994                         rec->offset, rec->length);
4995                 free(rec);
4996         }
4997
4998         return ret;
4999 }
5000
5001 static int process_device_item(struct rb_root *dev_cache,
5002                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5003 {
5004         struct btrfs_dev_item *ptr;
5005         struct device_record *rec;
5006         int ret = 0;
5007
5008         ptr = btrfs_item_ptr(eb,
5009                 slot, struct btrfs_dev_item);
5010
5011         rec = malloc(sizeof(*rec));
5012         if (!rec) {
5013                 fprintf(stderr, "memory allocation failed\n");
5014                 return -ENOMEM;
5015         }
5016
5017         rec->devid = key->offset;
5018         rec->generation = btrfs_header_generation(eb);
5019
5020         rec->objectid = key->objectid;
5021         rec->type = key->type;
5022         rec->offset = key->offset;
5023
5024         rec->devid = btrfs_device_id(eb, ptr);
5025         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5026         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5027
5028         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5029         if (ret) {
5030                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5031                 free(rec);
5032         }
5033
5034         return ret;
5035 }
5036
5037 struct block_group_record *
5038 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5039                              int slot)
5040 {
5041         struct btrfs_block_group_item *ptr;
5042         struct block_group_record *rec;
5043
5044         rec = calloc(1, sizeof(*rec));
5045         if (!rec) {
5046                 fprintf(stderr, "memory allocation failed\n");
5047                 exit(-1);
5048         }
5049
5050         rec->cache.start = key->objectid;
5051         rec->cache.size = key->offset;
5052
5053         rec->generation = btrfs_header_generation(leaf);
5054
5055         rec->objectid = key->objectid;
5056         rec->type = key->type;
5057         rec->offset = key->offset;
5058
5059         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5060         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5061
5062         INIT_LIST_HEAD(&rec->list);
5063
5064         return rec;
5065 }
5066
5067 static int process_block_group_item(struct block_group_tree *block_group_cache,
5068                                     struct btrfs_key *key,
5069                                     struct extent_buffer *eb, int slot)
5070 {
5071         struct block_group_record *rec;
5072         int ret = 0;
5073
5074         rec = btrfs_new_block_group_record(eb, key, slot);
5075         ret = insert_block_group_record(block_group_cache, rec);
5076         if (ret) {
5077                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5078                         rec->objectid, rec->offset);
5079                 free(rec);
5080         }
5081
5082         return ret;
5083 }
5084
5085 struct device_extent_record *
5086 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5087                                struct btrfs_key *key, int slot)
5088 {
5089         struct device_extent_record *rec;
5090         struct btrfs_dev_extent *ptr;
5091
5092         rec = calloc(1, sizeof(*rec));
5093         if (!rec) {
5094                 fprintf(stderr, "memory allocation failed\n");
5095                 exit(-1);
5096         }
5097
5098         rec->cache.objectid = key->objectid;
5099         rec->cache.start = key->offset;
5100
5101         rec->generation = btrfs_header_generation(leaf);
5102
5103         rec->objectid = key->objectid;
5104         rec->type = key->type;
5105         rec->offset = key->offset;
5106
5107         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5108         rec->chunk_objecteid =
5109                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5110         rec->chunk_offset =
5111                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5112         rec->length = btrfs_dev_extent_length(leaf, ptr);
5113         rec->cache.size = rec->length;
5114
5115         INIT_LIST_HEAD(&rec->chunk_list);
5116         INIT_LIST_HEAD(&rec->device_list);
5117
5118         return rec;
5119 }
5120
5121 static int
5122 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5123                            struct btrfs_key *key, struct extent_buffer *eb,
5124                            int slot)
5125 {
5126         struct device_extent_record *rec;
5127         int ret;
5128
5129         rec = btrfs_new_device_extent_record(eb, key, slot);
5130         ret = insert_device_extent_record(dev_extent_cache, rec);
5131         if (ret) {
5132                 fprintf(stderr,
5133                         "Device extent[%llu, %llu, %llu] existed.\n",
5134                         rec->objectid, rec->offset, rec->length);
5135                 free(rec);
5136         }
5137
5138         return ret;
5139 }
5140
5141 static int process_extent_item(struct btrfs_root *root,
5142                                struct cache_tree *extent_cache,
5143                                struct extent_buffer *eb, int slot)
5144 {
5145         struct btrfs_extent_item *ei;
5146         struct btrfs_extent_inline_ref *iref;
5147         struct btrfs_extent_data_ref *dref;
5148         struct btrfs_shared_data_ref *sref;
5149         struct btrfs_key key;
5150         unsigned long end;
5151         unsigned long ptr;
5152         int type;
5153         u32 item_size = btrfs_item_size_nr(eb, slot);
5154         u64 refs = 0;
5155         u64 offset;
5156         u64 num_bytes;
5157         int metadata = 0;
5158
5159         btrfs_item_key_to_cpu(eb, &key, slot);
5160
5161         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5162                 metadata = 1;
5163                 num_bytes = root->leafsize;
5164         } else {
5165                 num_bytes = key.offset;
5166         }
5167
5168         if (item_size < sizeof(*ei)) {
5169 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5170                 struct btrfs_extent_item_v0 *ei0;
5171                 BUG_ON(item_size != sizeof(*ei0));
5172                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5173                 refs = btrfs_extent_refs_v0(eb, ei0);
5174 #else
5175                 BUG();
5176 #endif
5177                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5178                                       num_bytes, refs, 0, 0, 0, metadata, 1,
5179                                       num_bytes);
5180         }
5181
5182         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5183         refs = btrfs_extent_refs(eb, ei);
5184         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5185                 metadata = 1;
5186         else
5187                 metadata = 0;
5188
5189         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5190                        refs, 0, 0, 0, metadata, 1, num_bytes);
5191
5192         ptr = (unsigned long)(ei + 1);
5193         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5194             key.type == BTRFS_EXTENT_ITEM_KEY)
5195                 ptr += sizeof(struct btrfs_tree_block_info);
5196
5197         end = (unsigned long)ei + item_size;
5198         while (ptr < end) {
5199                 iref = (struct btrfs_extent_inline_ref *)ptr;
5200                 type = btrfs_extent_inline_ref_type(eb, iref);
5201                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5202                 switch (type) {
5203                 case BTRFS_TREE_BLOCK_REF_KEY:
5204                         add_tree_backref(extent_cache, key.objectid,
5205                                          0, offset, 0);
5206                         break;
5207                 case BTRFS_SHARED_BLOCK_REF_KEY:
5208                         add_tree_backref(extent_cache, key.objectid,
5209                                          offset, 0, 0);
5210                         break;
5211                 case BTRFS_EXTENT_DATA_REF_KEY:
5212                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5213                         add_data_backref(extent_cache, key.objectid, 0,
5214                                         btrfs_extent_data_ref_root(eb, dref),
5215                                         btrfs_extent_data_ref_objectid(eb,
5216                                                                        dref),
5217                                         btrfs_extent_data_ref_offset(eb, dref),
5218                                         btrfs_extent_data_ref_count(eb, dref),
5219                                         0, num_bytes);
5220                         break;
5221                 case BTRFS_SHARED_DATA_REF_KEY:
5222                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5223                         add_data_backref(extent_cache, key.objectid, offset,
5224                                         0, 0, 0,
5225                                         btrfs_shared_data_ref_count(eb, sref),
5226                                         0, num_bytes);
5227                         break;
5228                 default:
5229                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5230                                 key.objectid, key.type, num_bytes);
5231                         goto out;
5232                 }
5233                 ptr += btrfs_extent_inline_ref_size(type);
5234         }
5235         WARN_ON(ptr > end);
5236 out:
5237         return 0;
5238 }
5239
5240 static int check_cache_range(struct btrfs_root *root,
5241                              struct btrfs_block_group_cache *cache,
5242                              u64 offset, u64 bytes)
5243 {
5244         struct btrfs_free_space *entry;
5245         u64 *logical;
5246         u64 bytenr;
5247         int stripe_len;
5248         int i, nr, ret;
5249
5250         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5251                 bytenr = btrfs_sb_offset(i);
5252                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5253                                        cache->key.objectid, bytenr, 0,
5254                                        &logical, &nr, &stripe_len);
5255                 if (ret)
5256                         return ret;
5257
5258                 while (nr--) {
5259                         if (logical[nr] + stripe_len <= offset)
5260                                 continue;
5261                         if (offset + bytes <= logical[nr])
5262                                 continue;
5263                         if (logical[nr] == offset) {
5264                                 if (stripe_len >= bytes) {
5265                                         kfree(logical);
5266                                         return 0;
5267                                 }
5268                                 bytes -= stripe_len;
5269                                 offset += stripe_len;
5270                         } else if (logical[nr] < offset) {
5271                                 if (logical[nr] + stripe_len >=
5272                                     offset + bytes) {
5273                                         kfree(logical);
5274                                         return 0;
5275                                 }
5276                                 bytes = (offset + bytes) -
5277                                         (logical[nr] + stripe_len);
5278                                 offset = logical[nr] + stripe_len;
5279                         } else {
5280                                 /*
5281                                  * Could be tricky, the super may land in the
5282                                  * middle of the area we're checking.  First
5283                                  * check the easiest case, it's at the end.
5284                                  */
5285                                 if (logical[nr] + stripe_len >=
5286                                     bytes + offset) {
5287                                         bytes = logical[nr] - offset;
5288                                         continue;
5289                                 }
5290
5291                                 /* Check the left side */
5292                                 ret = check_cache_range(root, cache,
5293                                                         offset,
5294                                                         logical[nr] - offset);
5295                                 if (ret) {
5296                                         kfree(logical);
5297                                         return ret;
5298                                 }
5299
5300                                 /* Now we continue with the right side */
5301                                 bytes = (offset + bytes) -
5302                                         (logical[nr] + stripe_len);
5303                                 offset = logical[nr] + stripe_len;
5304                         }
5305                 }
5306
5307                 kfree(logical);
5308         }
5309
5310         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5311         if (!entry) {
5312                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5313                         offset, offset+bytes);
5314                 return -EINVAL;
5315         }
5316
5317         if (entry->offset != offset) {
5318                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5319                         entry->offset);
5320                 return -EINVAL;
5321         }
5322
5323         if (entry->bytes != bytes) {
5324                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5325                         bytes, entry->bytes, offset);
5326                 return -EINVAL;
5327         }
5328
5329         unlink_free_space(cache->free_space_ctl, entry);
5330         free(entry);
5331         return 0;
5332 }
5333
5334 static int verify_space_cache(struct btrfs_root *root,
5335                               struct btrfs_block_group_cache *cache)
5336 {
5337         struct btrfs_path *path;
5338         struct extent_buffer *leaf;
5339         struct btrfs_key key;
5340         u64 last;
5341         int ret = 0;
5342
5343         path = btrfs_alloc_path();
5344         if (!path)
5345                 return -ENOMEM;
5346
5347         root = root->fs_info->extent_root;
5348
5349         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5350
5351         key.objectid = last;
5352         key.offset = 0;
5353         key.type = BTRFS_EXTENT_ITEM_KEY;
5354
5355         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5356         if (ret < 0)
5357                 goto out;
5358         ret = 0;
5359         while (1) {
5360                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5361                         ret = btrfs_next_leaf(root, path);
5362                         if (ret < 0)
5363                                 goto out;
5364                         if (ret > 0) {
5365                                 ret = 0;
5366                                 break;
5367                         }
5368                 }
5369                 leaf = path->nodes[0];
5370                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5371                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5372                         break;
5373                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5374                     key.type != BTRFS_METADATA_ITEM_KEY) {
5375                         path->slots[0]++;
5376                         continue;
5377                 }
5378
5379                 if (last == key.objectid) {
5380                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5381                                 last = key.objectid + key.offset;
5382                         else
5383                                 last = key.objectid + root->leafsize;
5384                         path->slots[0]++;
5385                         continue;
5386                 }
5387
5388                 ret = check_cache_range(root, cache, last,
5389                                         key.objectid - last);
5390                 if (ret)
5391                         break;
5392                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5393                         last = key.objectid + key.offset;
5394                 else
5395                         last = key.objectid + root->leafsize;
5396                 path->slots[0]++;
5397         }
5398
5399         if (last < cache->key.objectid + cache->key.offset)
5400                 ret = check_cache_range(root, cache, last,
5401                                         cache->key.objectid +
5402                                         cache->key.offset - last);
5403
5404 out:
5405         btrfs_free_path(path);
5406
5407         if (!ret &&
5408             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5409                 fprintf(stderr, "There are still entries left in the space "
5410                         "cache\n");
5411                 ret = -EINVAL;
5412         }
5413
5414         return ret;
5415 }
5416
5417 static int check_space_cache(struct btrfs_root *root)
5418 {
5419         struct btrfs_block_group_cache *cache;
5420         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5421         int ret;
5422         int error = 0;
5423
5424         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5425             btrfs_super_generation(root->fs_info->super_copy) !=
5426             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5427                 printf("cache and super generation don't match, space cache "
5428                        "will be invalidated\n");
5429                 return 0;
5430         }
5431
5432         if (ctx.progress_enabled) {
5433                 ctx.tp = TASK_FREE_SPACE;
5434                 task_start(ctx.info);
5435         }
5436
5437         while (1) {
5438                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5439                 if (!cache)
5440                         break;
5441
5442                 start = cache->key.objectid + cache->key.offset;
5443                 if (!cache->free_space_ctl) {
5444                         if (btrfs_init_free_space_ctl(cache,
5445                                                       root->sectorsize)) {
5446                                 ret = -ENOMEM;
5447                                 break;
5448                         }
5449                 } else {
5450                         btrfs_remove_free_space_cache(cache);
5451                 }
5452
5453                 ret = load_free_space_cache(root->fs_info, cache);
5454                 if (!ret)
5455                         continue;
5456
5457                 ret = verify_space_cache(root, cache);
5458                 if (ret) {
5459                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
5460                                 cache->key.objectid);
5461                         error++;
5462                 }
5463         }
5464
5465         task_stop(ctx.info);
5466
5467         return error ? -EINVAL : 0;
5468 }
5469
5470 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5471                         u64 num_bytes, unsigned long leaf_offset,
5472                         struct extent_buffer *eb) {
5473
5474         u64 offset = 0;
5475         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5476         char *data;
5477         unsigned long csum_offset;
5478         u32 csum;
5479         u32 csum_expected;
5480         u64 read_len;
5481         u64 data_checked = 0;
5482         u64 tmp;
5483         int ret = 0;
5484         int mirror;
5485         int num_copies;
5486
5487         if (num_bytes % root->sectorsize)
5488                 return -EINVAL;
5489
5490         data = malloc(num_bytes);
5491         if (!data)
5492                 return -ENOMEM;
5493
5494         while (offset < num_bytes) {
5495                 mirror = 0;
5496 again:
5497                 read_len = num_bytes - offset;
5498                 /* read as much space once a time */
5499                 ret = read_extent_data(root, data + offset,
5500                                 bytenr + offset, &read_len, mirror);
5501                 if (ret)
5502                         goto out;
5503                 data_checked = 0;
5504                 /* verify every 4k data's checksum */
5505                 while (data_checked < read_len) {
5506                         csum = ~(u32)0;
5507                         tmp = offset + data_checked;
5508
5509                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5510                                                csum, root->sectorsize);
5511                         btrfs_csum_final(csum, (char *)&csum);
5512
5513                         csum_offset = leaf_offset +
5514                                  tmp / root->sectorsize * csum_size;
5515                         read_extent_buffer(eb, (char *)&csum_expected,
5516                                            csum_offset, csum_size);
5517                         /* try another mirror */
5518                         if (csum != csum_expected) {
5519                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5520                                                 mirror, bytenr + tmp,
5521                                                 csum, csum_expected);
5522                                 num_copies = btrfs_num_copies(
5523                                                 &root->fs_info->mapping_tree,
5524                                                 bytenr, num_bytes);
5525                                 if (mirror < num_copies - 1) {
5526                                         mirror += 1;
5527                                         goto again;
5528                                 }
5529                         }
5530                         data_checked += root->sectorsize;
5531                 }
5532                 offset += read_len;
5533         }
5534 out:
5535         free(data);
5536         return ret;
5537 }
5538
5539 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5540                                u64 num_bytes)
5541 {
5542         struct btrfs_path *path;
5543         struct extent_buffer *leaf;
5544         struct btrfs_key key;
5545         int ret;
5546
5547         path = btrfs_alloc_path();
5548         if (!path) {
5549                 fprintf(stderr, "Error allocing path\n");
5550                 return -ENOMEM;
5551         }
5552
5553         key.objectid = bytenr;
5554         key.type = BTRFS_EXTENT_ITEM_KEY;
5555         key.offset = (u64)-1;
5556
5557 again:
5558         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5559                                 0, 0);
5560         if (ret < 0) {
5561                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5562                 btrfs_free_path(path);
5563                 return ret;
5564         } else if (ret) {
5565                 if (path->slots[0] > 0) {
5566                         path->slots[0]--;
5567                 } else {
5568                         ret = btrfs_prev_leaf(root, path);
5569                         if (ret < 0) {
5570                                 goto out;
5571                         } else if (ret > 0) {
5572                                 ret = 0;
5573                                 goto out;
5574                         }
5575                 }
5576         }
5577
5578         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5579
5580         /*
5581          * Block group items come before extent items if they have the same
5582          * bytenr, so walk back one more just in case.  Dear future traveler,
5583          * first congrats on mastering time travel.  Now if it's not too much
5584          * trouble could you go back to 2006 and tell Chris to make the
5585          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5586          * EXTENT_ITEM_KEY please?
5587          */
5588         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5589                 if (path->slots[0] > 0) {
5590                         path->slots[0]--;
5591                 } else {
5592                         ret = btrfs_prev_leaf(root, path);
5593                         if (ret < 0) {
5594                                 goto out;
5595                         } else if (ret > 0) {
5596                                 ret = 0;
5597                                 goto out;
5598                         }
5599                 }
5600                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5601         }
5602
5603         while (num_bytes) {
5604                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5605                         ret = btrfs_next_leaf(root, path);
5606                         if (ret < 0) {
5607                                 fprintf(stderr, "Error going to next leaf "
5608                                         "%d\n", ret);
5609                                 btrfs_free_path(path);
5610                                 return ret;
5611                         } else if (ret) {
5612                                 break;
5613                         }
5614                 }
5615                 leaf = path->nodes[0];
5616                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5617                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5618                         path->slots[0]++;
5619                         continue;
5620                 }
5621                 if (key.objectid + key.offset < bytenr) {
5622                         path->slots[0]++;
5623                         continue;
5624                 }
5625                 if (key.objectid > bytenr + num_bytes)
5626                         break;
5627
5628                 if (key.objectid == bytenr) {
5629                         if (key.offset >= num_bytes) {
5630                                 num_bytes = 0;
5631                                 break;
5632                         }
5633                         num_bytes -= key.offset;
5634                         bytenr += key.offset;
5635                 } else if (key.objectid < bytenr) {
5636                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5637                                 num_bytes = 0;
5638                                 break;
5639                         }
5640                         num_bytes = (bytenr + num_bytes) -
5641                                 (key.objectid + key.offset);
5642                         bytenr = key.objectid + key.offset;
5643                 } else {
5644                         if (key.objectid + key.offset < bytenr + num_bytes) {
5645                                 u64 new_start = key.objectid + key.offset;
5646                                 u64 new_bytes = bytenr + num_bytes - new_start;
5647
5648                                 /*
5649                                  * Weird case, the extent is in the middle of
5650                                  * our range, we'll have to search one side
5651                                  * and then the other.  Not sure if this happens
5652                                  * in real life, but no harm in coding it up
5653                                  * anyway just in case.
5654                                  */
5655                                 btrfs_release_path(path);
5656                                 ret = check_extent_exists(root, new_start,
5657                                                           new_bytes);
5658                                 if (ret) {
5659                                         fprintf(stderr, "Right section didn't "
5660                                                 "have a record\n");
5661                                         break;
5662                                 }
5663                                 num_bytes = key.objectid - bytenr;
5664                                 goto again;
5665                         }
5666                         num_bytes = key.objectid - bytenr;
5667                 }
5668                 path->slots[0]++;
5669         }
5670         ret = 0;
5671
5672 out:
5673         if (num_bytes && !ret) {
5674                 fprintf(stderr, "There are no extents for csum range "
5675                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5676                 ret = 1;
5677         }
5678
5679         btrfs_free_path(path);
5680         return ret;
5681 }
5682
5683 static int check_csums(struct btrfs_root *root)
5684 {
5685         struct btrfs_path *path;
5686         struct extent_buffer *leaf;
5687         struct btrfs_key key;
5688         u64 offset = 0, num_bytes = 0;
5689         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5690         int errors = 0;
5691         int ret;
5692         u64 data_len;
5693         unsigned long leaf_offset;
5694
5695         root = root->fs_info->csum_root;
5696         if (!extent_buffer_uptodate(root->node)) {
5697                 fprintf(stderr, "No valid csum tree found\n");
5698                 return -ENOENT;
5699         }
5700
5701         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5702         key.type = BTRFS_EXTENT_CSUM_KEY;
5703         key.offset = 0;
5704
5705         path = btrfs_alloc_path();
5706         if (!path)
5707                 return -ENOMEM;
5708
5709         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5710         if (ret < 0) {
5711                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5712                 btrfs_free_path(path);
5713                 return ret;
5714         }
5715
5716         if (ret > 0 && path->slots[0])
5717                 path->slots[0]--;
5718         ret = 0;
5719
5720         while (1) {
5721                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5722                         ret = btrfs_next_leaf(root, path);
5723                         if (ret < 0) {
5724                                 fprintf(stderr, "Error going to next leaf "
5725                                         "%d\n", ret);
5726                                 break;
5727                         }
5728                         if (ret)
5729                                 break;
5730                 }
5731                 leaf = path->nodes[0];
5732
5733                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5734                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5735                         path->slots[0]++;
5736                         continue;
5737                 }
5738
5739                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5740                               csum_size) * root->sectorsize;
5741                 if (!check_data_csum)
5742                         goto skip_csum_check;
5743                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5744                 ret = check_extent_csums(root, key.offset, data_len,
5745                                          leaf_offset, leaf);
5746                 if (ret)
5747                         break;
5748 skip_csum_check:
5749                 if (!num_bytes) {
5750                         offset = key.offset;
5751                 } else if (key.offset != offset + num_bytes) {
5752                         ret = check_extent_exists(root, offset, num_bytes);
5753                         if (ret) {
5754                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5755                                         "there is no extent record\n",
5756                                         offset, offset+num_bytes);
5757                                 errors++;
5758                         }
5759                         offset = key.offset;
5760                         num_bytes = 0;
5761                 }
5762                 num_bytes += data_len;
5763                 path->slots[0]++;
5764         }
5765
5766         btrfs_free_path(path);
5767         return errors;
5768 }
5769
5770 static int is_dropped_key(struct btrfs_key *key,
5771                           struct btrfs_key *drop_key) {
5772         if (key->objectid < drop_key->objectid)
5773                 return 1;
5774         else if (key->objectid == drop_key->objectid) {
5775                 if (key->type < drop_key->type)
5776                         return 1;
5777                 else if (key->type == drop_key->type) {
5778                         if (key->offset < drop_key->offset)
5779                                 return 1;
5780                 }
5781         }
5782         return 0;
5783 }
5784
5785 /*
5786  * Here are the rules for FULL_BACKREF.
5787  *
5788  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5789  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5790  *      FULL_BACKREF set.
5791  * 3) We cow'ed the block walking down a reloc tree.  This is impossible to tell
5792  *    if it happened after the relocation occurred since we'll have dropped the
5793  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5794  *    have no real way to know for sure.
5795  *
5796  * We process the blocks one root at a time, and we start from the lowest root
5797  * objectid and go to the highest.  So we can just lookup the owner backref for
5798  * the record and if we don't find it then we know it doesn't exist and we have
5799  * a FULL BACKREF.
5800  *
5801  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5802  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5803  * be set or not and then we can check later once we've gathered all the refs.
5804  */
5805 static int calc_extent_flag(struct btrfs_root *root,
5806                            struct cache_tree *extent_cache,
5807                            struct extent_buffer *buf,
5808                            struct root_item_record *ri,
5809                            u64 *flags)
5810 {
5811         struct extent_record *rec;
5812         struct cache_extent *cache;
5813         struct tree_backref *tback;
5814         u64 owner = 0;
5815
5816         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5817         /* we have added this extent before */
5818         BUG_ON(!cache);
5819         rec = container_of(cache, struct extent_record, cache);
5820
5821         /*
5822          * Except file/reloc tree, we can not have
5823          * FULL BACKREF MODE
5824          */
5825         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5826                 goto normal;
5827         /*
5828          * root node
5829          */
5830         if (buf->start == ri->bytenr)
5831                 goto normal;
5832
5833         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5834                 goto full_backref;
5835
5836         owner = btrfs_header_owner(buf);
5837         if (owner == ri->objectid)
5838                 goto normal;
5839
5840         tback = find_tree_backref(rec, 0, owner);
5841         if (!tback)
5842                 goto full_backref;
5843 normal:
5844         *flags = 0;
5845         if (rec->flag_block_full_backref != -1 &&
5846             rec->flag_block_full_backref != 0)
5847                 rec->bad_full_backref = 1;
5848         return 0;
5849 full_backref:
5850         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5851         if (rec->flag_block_full_backref != -1 &&
5852             rec->flag_block_full_backref != 1)
5853                 rec->bad_full_backref = 1;
5854         return 0;
5855 }
5856
5857 static int run_next_block(struct btrfs_root *root,
5858                           struct block_info *bits,
5859                           int bits_nr,
5860                           u64 *last,
5861                           struct cache_tree *pending,
5862                           struct cache_tree *seen,
5863                           struct cache_tree *reada,
5864                           struct cache_tree *nodes,
5865                           struct cache_tree *extent_cache,
5866                           struct cache_tree *chunk_cache,
5867                           struct rb_root *dev_cache,
5868                           struct block_group_tree *block_group_cache,
5869                           struct device_extent_tree *dev_extent_cache,
5870                           struct root_item_record *ri)
5871 {
5872         struct extent_buffer *buf;
5873         struct extent_record *rec = NULL;
5874         u64 bytenr;
5875         u32 size;
5876         u64 parent;
5877         u64 owner;
5878         u64 flags;
5879         u64 ptr;
5880         u64 gen = 0;
5881         int ret = 0;
5882         int i;
5883         int nritems;
5884         struct btrfs_key key;
5885         struct cache_extent *cache;
5886         int reada_bits;
5887
5888         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5889                                     bits_nr, &reada_bits);
5890         if (nritems == 0)
5891                 return 1;
5892
5893         if (!reada_bits) {
5894                 for(i = 0; i < nritems; i++) {
5895                         ret = add_cache_extent(reada, bits[i].start,
5896                                                bits[i].size);
5897                         if (ret == -EEXIST)
5898                                 continue;
5899
5900                         /* fixme, get the parent transid */
5901                         readahead_tree_block(root, bits[i].start,
5902                                              bits[i].size, 0);
5903                 }
5904         }
5905         *last = bits[0].start;
5906         bytenr = bits[0].start;
5907         size = bits[0].size;
5908
5909         cache = lookup_cache_extent(pending, bytenr, size);
5910         if (cache) {
5911                 remove_cache_extent(pending, cache);
5912                 free(cache);
5913         }
5914         cache = lookup_cache_extent(reada, bytenr, size);
5915         if (cache) {
5916                 remove_cache_extent(reada, cache);
5917                 free(cache);
5918         }
5919         cache = lookup_cache_extent(nodes, bytenr, size);
5920         if (cache) {
5921                 remove_cache_extent(nodes, cache);
5922                 free(cache);
5923         }
5924         cache = lookup_cache_extent(extent_cache, bytenr, size);
5925         if (cache) {
5926                 rec = container_of(cache, struct extent_record, cache);
5927                 gen = rec->parent_generation;
5928         }
5929
5930         /* fixme, get the real parent transid */
5931         buf = read_tree_block(root, bytenr, size, gen);
5932         if (!extent_buffer_uptodate(buf)) {
5933                 record_bad_block_io(root->fs_info,
5934                                     extent_cache, bytenr, size);
5935                 goto out;
5936         }
5937
5938         nritems = btrfs_header_nritems(buf);
5939
5940         flags = 0;
5941         if (!init_extent_tree) {
5942                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5943                                        btrfs_header_level(buf), 1, NULL,
5944                                        &flags);
5945                 if (ret < 0) {
5946                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5947                         if (ret < 0) {
5948                                 fprintf(stderr, "Couldn't calc extent flags\n");
5949                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5950                         }
5951                 }
5952         } else {
5953                 flags = 0;
5954                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5955                 if (ret < 0) {
5956                         fprintf(stderr, "Couldn't calc extent flags\n");
5957                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5958                 }
5959         }
5960
5961         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5962                 if (ri != NULL &&
5963                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5964                     ri->objectid == btrfs_header_owner(buf)) {
5965                         /*
5966                          * Ok we got to this block from it's original owner and
5967                          * we have FULL_BACKREF set.  Relocation can leave
5968                          * converted blocks over so this is altogether possible,
5969                          * however it's not possible if the generation > the
5970                          * last snapshot, so check for this case.
5971                          */
5972                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5973                             btrfs_header_generation(buf) > ri->last_snapshot) {
5974                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5975                                 rec->bad_full_backref = 1;
5976                         }
5977                 }
5978         } else {
5979                 if (ri != NULL &&
5980                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5981                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5982                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5983                         rec->bad_full_backref = 1;
5984                 }
5985         }
5986
5987         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5988                 rec->flag_block_full_backref = 1;
5989                 parent = bytenr;
5990                 owner = 0;
5991         } else {
5992                 rec->flag_block_full_backref = 0;
5993                 parent = 0;
5994                 owner = btrfs_header_owner(buf);
5995         }
5996
5997         ret = check_block(root, extent_cache, buf, flags);
5998         if (ret)
5999                 goto out;
6000
6001         if (btrfs_is_leaf(buf)) {
6002                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6003                 for (i = 0; i < nritems; i++) {
6004                         struct btrfs_file_extent_item *fi;
6005                         btrfs_item_key_to_cpu(buf, &key, i);
6006                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6007                                 process_extent_item(root, extent_cache, buf,
6008                                                     i);
6009                                 continue;
6010                         }
6011                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6012                                 process_extent_item(root, extent_cache, buf,
6013                                                     i);
6014                                 continue;
6015                         }
6016                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6017                                 total_csum_bytes +=
6018                                         btrfs_item_size_nr(buf, i);
6019                                 continue;
6020                         }
6021                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6022                                 process_chunk_item(chunk_cache, &key, buf, i);
6023                                 continue;
6024                         }
6025                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6026                                 process_device_item(dev_cache, &key, buf, i);
6027                                 continue;
6028                         }
6029                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6030                                 process_block_group_item(block_group_cache,
6031                                         &key, buf, i);
6032                                 continue;
6033                         }
6034                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6035                                 process_device_extent_item(dev_extent_cache,
6036                                         &key, buf, i);
6037                                 continue;
6038
6039                         }
6040                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6041 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6042                                 process_extent_ref_v0(extent_cache, buf, i);
6043 #else
6044                                 BUG();
6045 #endif
6046                                 continue;
6047                         }
6048
6049                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6050                                 add_tree_backref(extent_cache, key.objectid, 0,
6051                                                  key.offset, 0);
6052                                 continue;
6053                         }
6054                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6055                                 add_tree_backref(extent_cache, key.objectid,
6056                                                  key.offset, 0, 0);
6057                                 continue;
6058                         }
6059                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6060                                 struct btrfs_extent_data_ref *ref;
6061                                 ref = btrfs_item_ptr(buf, i,
6062                                                 struct btrfs_extent_data_ref);
6063                                 add_data_backref(extent_cache,
6064                                         key.objectid, 0,
6065                                         btrfs_extent_data_ref_root(buf, ref),
6066                                         btrfs_extent_data_ref_objectid(buf,
6067                                                                        ref),
6068                                         btrfs_extent_data_ref_offset(buf, ref),
6069                                         btrfs_extent_data_ref_count(buf, ref),
6070                                         0, root->sectorsize);
6071                                 continue;
6072                         }
6073                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6074                                 struct btrfs_shared_data_ref *ref;
6075                                 ref = btrfs_item_ptr(buf, i,
6076                                                 struct btrfs_shared_data_ref);
6077                                 add_data_backref(extent_cache,
6078                                         key.objectid, key.offset, 0, 0, 0,
6079                                         btrfs_shared_data_ref_count(buf, ref),
6080                                         0, root->sectorsize);
6081                                 continue;
6082                         }
6083                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6084                                 struct bad_item *bad;
6085
6086                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6087                                         continue;
6088                                 if (!owner)
6089                                         continue;
6090                                 bad = malloc(sizeof(struct bad_item));
6091                                 if (!bad)
6092                                         continue;
6093                                 INIT_LIST_HEAD(&bad->list);
6094                                 memcpy(&bad->key, &key,
6095                                        sizeof(struct btrfs_key));
6096                                 bad->root_id = owner;
6097                                 list_add_tail(&bad->list, &delete_items);
6098                                 continue;
6099                         }
6100                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6101                                 continue;
6102                         fi = btrfs_item_ptr(buf, i,
6103                                             struct btrfs_file_extent_item);
6104                         if (btrfs_file_extent_type(buf, fi) ==
6105                             BTRFS_FILE_EXTENT_INLINE)
6106                                 continue;
6107                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6108                                 continue;
6109
6110                         data_bytes_allocated +=
6111                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6112                         if (data_bytes_allocated < root->sectorsize) {
6113                                 abort();
6114                         }
6115                         data_bytes_referenced +=
6116                                 btrfs_file_extent_num_bytes(buf, fi);
6117                         add_data_backref(extent_cache,
6118                                 btrfs_file_extent_disk_bytenr(buf, fi),
6119                                 parent, owner, key.objectid, key.offset -
6120                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6121                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6122                 }
6123         } else {
6124                 int level;
6125                 struct btrfs_key first_key;
6126
6127                 first_key.objectid = 0;
6128
6129                 if (nritems > 0)
6130                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6131                 level = btrfs_header_level(buf);
6132                 for (i = 0; i < nritems; i++) {
6133                         ptr = btrfs_node_blockptr(buf, i);
6134                         size = btrfs_level_size(root, level - 1);
6135                         btrfs_node_key_to_cpu(buf, &key, i);
6136                         if (ri != NULL) {
6137                                 if ((level == ri->drop_level)
6138                                     && is_dropped_key(&key, &ri->drop_key)) {
6139                                         continue;
6140                                 }
6141                         }
6142                         ret = add_extent_rec(extent_cache, &key,
6143                                              btrfs_node_ptr_generation(buf, i),
6144                                              ptr, size, 0, 0, 1, 0, 1, 0,
6145                                              size);
6146                         BUG_ON(ret);
6147
6148                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6149
6150                         if (level > 1) {
6151                                 add_pending(nodes, seen, ptr, size);
6152                         } else {
6153                                 add_pending(pending, seen, ptr, size);
6154                         }
6155                 }
6156                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6157                                       nritems) * sizeof(struct btrfs_key_ptr);
6158         }
6159         total_btree_bytes += buf->len;
6160         if (fs_root_objectid(btrfs_header_owner(buf)))
6161                 total_fs_tree_bytes += buf->len;
6162         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6163                 total_extent_tree_bytes += buf->len;
6164         if (!found_old_backref &&
6165             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6166             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6167             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6168                 found_old_backref = 1;
6169 out:
6170         free_extent_buffer(buf);
6171         return ret;
6172 }
6173
6174 static int add_root_to_pending(struct extent_buffer *buf,
6175                                struct cache_tree *extent_cache,
6176                                struct cache_tree *pending,
6177                                struct cache_tree *seen,
6178                                struct cache_tree *nodes,
6179                                u64 objectid)
6180 {
6181         if (btrfs_header_level(buf) > 0)
6182                 add_pending(nodes, seen, buf->start, buf->len);
6183         else
6184                 add_pending(pending, seen, buf->start, buf->len);
6185         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6186                        0, 1, 1, 0, 1, 0, buf->len);
6187
6188         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6189             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6190                 add_tree_backref(extent_cache, buf->start, buf->start,
6191                                  0, 1);
6192         else
6193                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6194         return 0;
6195 }
6196
6197 /* as we fix the tree, we might be deleting blocks that
6198  * we're tracking for repair.  This hook makes sure we
6199  * remove any backrefs for blocks as we are fixing them.
6200  */
6201 static int free_extent_hook(struct btrfs_trans_handle *trans,
6202                             struct btrfs_root *root,
6203                             u64 bytenr, u64 num_bytes, u64 parent,
6204                             u64 root_objectid, u64 owner, u64 offset,
6205                             int refs_to_drop)
6206 {
6207         struct extent_record *rec;
6208         struct cache_extent *cache;
6209         int is_data;
6210         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6211
6212         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6213         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6214         if (!cache)
6215                 return 0;
6216
6217         rec = container_of(cache, struct extent_record, cache);
6218         if (is_data) {
6219                 struct data_backref *back;
6220                 back = find_data_backref(rec, parent, root_objectid, owner,
6221                                          offset, 1, bytenr, num_bytes);
6222                 if (!back)
6223                         goto out;
6224                 if (back->node.found_ref) {
6225                         back->found_ref -= refs_to_drop;
6226                         if (rec->refs)
6227                                 rec->refs -= refs_to_drop;
6228                 }
6229                 if (back->node.found_extent_tree) {
6230                         back->num_refs -= refs_to_drop;
6231                         if (rec->extent_item_refs)
6232                                 rec->extent_item_refs -= refs_to_drop;
6233                 }
6234                 if (back->found_ref == 0)
6235                         back->node.found_ref = 0;
6236                 if (back->num_refs == 0)
6237                         back->node.found_extent_tree = 0;
6238
6239                 if (!back->node.found_extent_tree && back->node.found_ref) {
6240                         list_del(&back->node.list);
6241                         free(back);
6242                 }
6243         } else {
6244                 struct tree_backref *back;
6245                 back = find_tree_backref(rec, parent, root_objectid);
6246                 if (!back)
6247                         goto out;
6248                 if (back->node.found_ref) {
6249                         if (rec->refs)
6250                                 rec->refs--;
6251                         back->node.found_ref = 0;
6252                 }
6253                 if (back->node.found_extent_tree) {
6254                         if (rec->extent_item_refs)
6255                                 rec->extent_item_refs--;
6256                         back->node.found_extent_tree = 0;
6257                 }
6258                 if (!back->node.found_extent_tree && back->node.found_ref) {
6259                         list_del(&back->node.list);
6260                         free(back);
6261                 }
6262         }
6263         maybe_free_extent_rec(extent_cache, rec);
6264 out:
6265         return 0;
6266 }
6267
6268 static int delete_extent_records(struct btrfs_trans_handle *trans,
6269                                  struct btrfs_root *root,
6270                                  struct btrfs_path *path,
6271                                  u64 bytenr, u64 new_len)
6272 {
6273         struct btrfs_key key;
6274         struct btrfs_key found_key;
6275         struct extent_buffer *leaf;
6276         int ret;
6277         int slot;
6278
6279
6280         key.objectid = bytenr;
6281         key.type = (u8)-1;
6282         key.offset = (u64)-1;
6283
6284         while(1) {
6285                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6286                                         &key, path, 0, 1);
6287                 if (ret < 0)
6288                         break;
6289
6290                 if (ret > 0) {
6291                         ret = 0;
6292                         if (path->slots[0] == 0)
6293                                 break;
6294                         path->slots[0]--;
6295                 }
6296                 ret = 0;
6297
6298                 leaf = path->nodes[0];
6299                 slot = path->slots[0];
6300
6301                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6302                 if (found_key.objectid != bytenr)
6303                         break;
6304
6305                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6306                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6307                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6308                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6309                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6310                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6311                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6312                         btrfs_release_path(path);
6313                         if (found_key.type == 0) {
6314                                 if (found_key.offset == 0)
6315                                         break;
6316                                 key.offset = found_key.offset - 1;
6317                                 key.type = found_key.type;
6318                         }
6319                         key.type = found_key.type - 1;
6320                         key.offset = (u64)-1;
6321                         continue;
6322                 }
6323
6324                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6325                         found_key.objectid, found_key.type, found_key.offset);
6326
6327                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6328                 if (ret)
6329                         break;
6330                 btrfs_release_path(path);
6331
6332                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6333                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6334                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6335                                 found_key.offset : root->leafsize;
6336
6337                         ret = btrfs_update_block_group(trans, root, bytenr,
6338                                                        bytes, 0, 0);
6339                         if (ret)
6340                                 break;
6341                 }
6342         }
6343
6344         btrfs_release_path(path);
6345         return ret;
6346 }
6347
6348 /*
6349  * for a single backref, this will allocate a new extent
6350  * and add the backref to it.
6351  */
6352 static int record_extent(struct btrfs_trans_handle *trans,
6353                          struct btrfs_fs_info *info,
6354                          struct btrfs_path *path,
6355                          struct extent_record *rec,
6356                          struct extent_backref *back,
6357                          int allocated, u64 flags)
6358 {
6359         int ret;
6360         struct btrfs_root *extent_root = info->extent_root;
6361         struct extent_buffer *leaf;
6362         struct btrfs_key ins_key;
6363         struct btrfs_extent_item *ei;
6364         struct tree_backref *tback;
6365         struct data_backref *dback;
6366         struct btrfs_tree_block_info *bi;
6367
6368         if (!back->is_data)
6369                 rec->max_size = max_t(u64, rec->max_size,
6370                                     info->extent_root->leafsize);
6371
6372         if (!allocated) {
6373                 u32 item_size = sizeof(*ei);
6374
6375                 if (!back->is_data)
6376                         item_size += sizeof(*bi);
6377
6378                 ins_key.objectid = rec->start;
6379                 ins_key.offset = rec->max_size;
6380                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6381
6382                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6383                                         &ins_key, item_size);
6384                 if (ret)
6385                         goto fail;
6386
6387                 leaf = path->nodes[0];
6388                 ei = btrfs_item_ptr(leaf, path->slots[0],
6389                                     struct btrfs_extent_item);
6390
6391                 btrfs_set_extent_refs(leaf, ei, 0);
6392                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6393
6394                 if (back->is_data) {
6395                         btrfs_set_extent_flags(leaf, ei,
6396                                                BTRFS_EXTENT_FLAG_DATA);
6397                 } else {
6398                         struct btrfs_disk_key copy_key;;
6399
6400                         tback = (struct tree_backref *)back;
6401                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6402                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6403                                              sizeof(*bi));
6404
6405                         btrfs_set_disk_key_objectid(&copy_key,
6406                                                     rec->info_objectid);
6407                         btrfs_set_disk_key_type(&copy_key, 0);
6408                         btrfs_set_disk_key_offset(&copy_key, 0);
6409
6410                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6411                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6412
6413                         btrfs_set_extent_flags(leaf, ei,
6414                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6415                 }
6416
6417                 btrfs_mark_buffer_dirty(leaf);
6418                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6419                                                rec->max_size, 1, 0);
6420                 if (ret)
6421                         goto fail;
6422                 btrfs_release_path(path);
6423         }
6424
6425         if (back->is_data) {
6426                 u64 parent;
6427                 int i;
6428
6429                 dback = (struct data_backref *)back;
6430                 if (back->full_backref)
6431                         parent = dback->parent;
6432                 else
6433                         parent = 0;
6434
6435                 for (i = 0; i < dback->found_ref; i++) {
6436                         /* if parent != 0, we're doing a full backref
6437                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6438                          * just makes the backref allocator create a data
6439                          * backref
6440                          */
6441                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6442                                                    rec->start, rec->max_size,
6443                                                    parent,
6444                                                    dback->root,
6445                                                    parent ?
6446                                                    BTRFS_FIRST_FREE_OBJECTID :
6447                                                    dback->owner,
6448                                                    dback->offset);
6449                         if (ret)
6450                                 break;
6451                 }
6452                 fprintf(stderr, "adding new data backref"
6453                                 " on %llu %s %llu owner %llu"
6454                                 " offset %llu found %d\n",
6455                                 (unsigned long long)rec->start,
6456                                 back->full_backref ?
6457                                 "parent" : "root",
6458                                 back->full_backref ?
6459                                 (unsigned long long)parent :
6460                                 (unsigned long long)dback->root,
6461                                 (unsigned long long)dback->owner,
6462                                 (unsigned long long)dback->offset,
6463                                 dback->found_ref);
6464         } else {
6465                 u64 parent;
6466
6467                 tback = (struct tree_backref *)back;
6468                 if (back->full_backref)
6469                         parent = tback->parent;
6470                 else
6471                         parent = 0;
6472
6473                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6474                                            rec->start, rec->max_size,
6475                                            parent, tback->root, 0, 0);
6476                 fprintf(stderr, "adding new tree backref on "
6477                         "start %llu len %llu parent %llu root %llu\n",
6478                         rec->start, rec->max_size, parent, tback->root);
6479         }
6480 fail:
6481         btrfs_release_path(path);
6482         return ret;
6483 }
6484
6485 struct extent_entry {
6486         u64 bytenr;
6487         u64 bytes;
6488         int count;
6489         int broken;
6490         struct list_head list;
6491 };
6492
6493 static struct extent_entry *find_entry(struct list_head *entries,
6494                                        u64 bytenr, u64 bytes)
6495 {
6496         struct extent_entry *entry = NULL;
6497
6498         list_for_each_entry(entry, entries, list) {
6499                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6500                         return entry;
6501         }
6502
6503         return NULL;
6504 }
6505
6506 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6507 {
6508         struct extent_entry *entry, *best = NULL, *prev = NULL;
6509
6510         list_for_each_entry(entry, entries, list) {
6511                 if (!prev) {
6512                         prev = entry;
6513                         continue;
6514                 }
6515
6516                 /*
6517                  * If there are as many broken entries as entries then we know
6518                  * not to trust this particular entry.
6519                  */
6520                 if (entry->broken == entry->count)
6521                         continue;
6522
6523                 /*
6524                  * If our current entry == best then we can't be sure our best
6525                  * is really the best, so we need to keep searching.
6526                  */
6527                 if (best && best->count == entry->count) {
6528                         prev = entry;
6529                         best = NULL;
6530                         continue;
6531                 }
6532
6533                 /* Prev == entry, not good enough, have to keep searching */
6534                 if (!prev->broken && prev->count == entry->count)
6535                         continue;
6536
6537                 if (!best)
6538                         best = (prev->count > entry->count) ? prev : entry;
6539                 else if (best->count < entry->count)
6540                         best = entry;
6541                 prev = entry;
6542         }
6543
6544         return best;
6545 }
6546
6547 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6548                       struct data_backref *dback, struct extent_entry *entry)
6549 {
6550         struct btrfs_trans_handle *trans;
6551         struct btrfs_root *root;
6552         struct btrfs_file_extent_item *fi;
6553         struct extent_buffer *leaf;
6554         struct btrfs_key key;
6555         u64 bytenr, bytes;
6556         int ret, err;
6557
6558         key.objectid = dback->root;
6559         key.type = BTRFS_ROOT_ITEM_KEY;
6560         key.offset = (u64)-1;
6561         root = btrfs_read_fs_root(info, &key);
6562         if (IS_ERR(root)) {
6563                 fprintf(stderr, "Couldn't find root for our ref\n");
6564                 return -EINVAL;
6565         }
6566
6567         /*
6568          * The backref points to the original offset of the extent if it was
6569          * split, so we need to search down to the offset we have and then walk
6570          * forward until we find the backref we're looking for.
6571          */
6572         key.objectid = dback->owner;
6573         key.type = BTRFS_EXTENT_DATA_KEY;
6574         key.offset = dback->offset;
6575         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6576         if (ret < 0) {
6577                 fprintf(stderr, "Error looking up ref %d\n", ret);
6578                 return ret;
6579         }
6580
6581         while (1) {
6582                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6583                         ret = btrfs_next_leaf(root, path);
6584                         if (ret) {
6585                                 fprintf(stderr, "Couldn't find our ref, next\n");
6586                                 return -EINVAL;
6587                         }
6588                 }
6589                 leaf = path->nodes[0];
6590                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6591                 if (key.objectid != dback->owner ||
6592                     key.type != BTRFS_EXTENT_DATA_KEY) {
6593                         fprintf(stderr, "Couldn't find our ref, search\n");
6594                         return -EINVAL;
6595                 }
6596                 fi = btrfs_item_ptr(leaf, path->slots[0],
6597                                     struct btrfs_file_extent_item);
6598                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6599                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6600
6601                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6602                         break;
6603                 path->slots[0]++;
6604         }
6605
6606         btrfs_release_path(path);
6607
6608         trans = btrfs_start_transaction(root, 1);
6609         if (IS_ERR(trans))
6610                 return PTR_ERR(trans);
6611
6612         /*
6613          * Ok we have the key of the file extent we want to fix, now we can cow
6614          * down to the thing and fix it.
6615          */
6616         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6617         if (ret < 0) {
6618                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6619                         key.objectid, key.type, key.offset, ret);
6620                 goto out;
6621         }
6622         if (ret > 0) {
6623                 fprintf(stderr, "Well that's odd, we just found this key "
6624                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6625                         key.offset);
6626                 ret = -EINVAL;
6627                 goto out;
6628         }
6629         leaf = path->nodes[0];
6630         fi = btrfs_item_ptr(leaf, path->slots[0],
6631                             struct btrfs_file_extent_item);
6632
6633         if (btrfs_file_extent_compression(leaf, fi) &&
6634             dback->disk_bytenr != entry->bytenr) {
6635                 fprintf(stderr, "Ref doesn't match the record start and is "
6636                         "compressed, please take a btrfs-image of this file "
6637                         "system and send it to a btrfs developer so they can "
6638                         "complete this functionality for bytenr %Lu\n",
6639                         dback->disk_bytenr);
6640                 ret = -EINVAL;
6641                 goto out;
6642         }
6643
6644         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6645                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6646         } else if (dback->disk_bytenr > entry->bytenr) {
6647                 u64 off_diff, offset;
6648
6649                 off_diff = dback->disk_bytenr - entry->bytenr;
6650                 offset = btrfs_file_extent_offset(leaf, fi);
6651                 if (dback->disk_bytenr + offset +
6652                     btrfs_file_extent_num_bytes(leaf, fi) >
6653                     entry->bytenr + entry->bytes) {
6654                         fprintf(stderr, "Ref is past the entry end, please "
6655                                 "take a btrfs-image of this file system and "
6656                                 "send it to a btrfs developer, ref %Lu\n",
6657                                 dback->disk_bytenr);
6658                         ret = -EINVAL;
6659                         goto out;
6660                 }
6661                 offset += off_diff;
6662                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6663                 btrfs_set_file_extent_offset(leaf, fi, offset);
6664         } else if (dback->disk_bytenr < entry->bytenr) {
6665                 u64 offset;
6666
6667                 offset = btrfs_file_extent_offset(leaf, fi);
6668                 if (dback->disk_bytenr + offset < entry->bytenr) {
6669                         fprintf(stderr, "Ref is before the entry start, please"
6670                                 " take a btrfs-image of this file system and "
6671                                 "send it to a btrfs developer, ref %Lu\n",
6672                                 dback->disk_bytenr);
6673                         ret = -EINVAL;
6674                         goto out;
6675                 }
6676
6677                 offset += dback->disk_bytenr;
6678                 offset -= entry->bytenr;
6679                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6680                 btrfs_set_file_extent_offset(leaf, fi, offset);
6681         }
6682
6683         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6684
6685         /*
6686          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6687          * only do this if we aren't using compression, otherwise it's a
6688          * trickier case.
6689          */
6690         if (!btrfs_file_extent_compression(leaf, fi))
6691                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6692         else
6693                 printf("ram bytes may be wrong?\n");
6694         btrfs_mark_buffer_dirty(leaf);
6695 out:
6696         err = btrfs_commit_transaction(trans, root);
6697         btrfs_release_path(path);
6698         return ret ? ret : err;
6699 }
6700
6701 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6702                            struct extent_record *rec)
6703 {
6704         struct extent_backref *back;
6705         struct data_backref *dback;
6706         struct extent_entry *entry, *best = NULL;
6707         LIST_HEAD(entries);
6708         int nr_entries = 0;
6709         int broken_entries = 0;
6710         int ret = 0;
6711         short mismatch = 0;
6712
6713         /*
6714          * Metadata is easy and the backrefs should always agree on bytenr and
6715          * size, if not we've got bigger issues.
6716          */
6717         if (rec->metadata)
6718                 return 0;
6719
6720         list_for_each_entry(back, &rec->backrefs, list) {
6721                 if (back->full_backref || !back->is_data)
6722                         continue;
6723
6724                 dback = (struct data_backref *)back;
6725
6726                 /*
6727                  * We only pay attention to backrefs that we found a real
6728                  * backref for.
6729                  */
6730                 if (dback->found_ref == 0)
6731                         continue;
6732
6733                 /*
6734                  * For now we only catch when the bytes don't match, not the
6735                  * bytenr.  We can easily do this at the same time, but I want
6736                  * to have a fs image to test on before we just add repair
6737                  * functionality willy-nilly so we know we won't screw up the
6738                  * repair.
6739                  */
6740
6741                 entry = find_entry(&entries, dback->disk_bytenr,
6742                                    dback->bytes);
6743                 if (!entry) {
6744                         entry = malloc(sizeof(struct extent_entry));
6745                         if (!entry) {
6746                                 ret = -ENOMEM;
6747                                 goto out;
6748                         }
6749                         memset(entry, 0, sizeof(*entry));
6750                         entry->bytenr = dback->disk_bytenr;
6751                         entry->bytes = dback->bytes;
6752                         list_add_tail(&entry->list, &entries);
6753                         nr_entries++;
6754                 }
6755
6756                 /*
6757                  * If we only have on entry we may think the entries agree when
6758                  * in reality they don't so we have to do some extra checking.
6759                  */
6760                 if (dback->disk_bytenr != rec->start ||
6761                     dback->bytes != rec->nr || back->broken)
6762                         mismatch = 1;
6763
6764                 if (back->broken) {
6765                         entry->broken++;
6766                         broken_entries++;
6767                 }
6768
6769                 entry->count++;
6770         }
6771
6772         /* Yay all the backrefs agree, carry on good sir */
6773         if (nr_entries <= 1 && !mismatch)
6774                 goto out;
6775
6776         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6777                 "%Lu\n", rec->start);
6778
6779         /*
6780          * First we want to see if the backrefs can agree amongst themselves who
6781          * is right, so figure out which one of the entries has the highest
6782          * count.
6783          */
6784         best = find_most_right_entry(&entries);
6785
6786         /*
6787          * Ok so we may have an even split between what the backrefs think, so
6788          * this is where we use the extent ref to see what it thinks.
6789          */
6790         if (!best) {
6791                 entry = find_entry(&entries, rec->start, rec->nr);
6792                 if (!entry && (!broken_entries || !rec->found_rec)) {
6793                         fprintf(stderr, "Backrefs don't agree with each other "
6794                                 "and extent record doesn't agree with anybody,"
6795                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6796                                 rec->start, rec->nr);
6797                         ret = -EINVAL;
6798                         goto out;
6799                 } else if (!entry) {
6800                         /*
6801                          * Ok our backrefs were broken, we'll assume this is the
6802                          * correct value and add an entry for this range.
6803                          */
6804                         entry = malloc(sizeof(struct extent_entry));
6805                         if (!entry) {
6806                                 ret = -ENOMEM;
6807                                 goto out;
6808                         }
6809                         memset(entry, 0, sizeof(*entry));
6810                         entry->bytenr = rec->start;
6811                         entry->bytes = rec->nr;
6812                         list_add_tail(&entry->list, &entries);
6813                         nr_entries++;
6814                 }
6815                 entry->count++;
6816                 best = find_most_right_entry(&entries);
6817                 if (!best) {
6818                         fprintf(stderr, "Backrefs and extent record evenly "
6819                                 "split on who is right, this is going to "
6820                                 "require user input to fix bytenr %Lu bytes "
6821                                 "%Lu\n", rec->start, rec->nr);
6822                         ret = -EINVAL;
6823                         goto out;
6824                 }
6825         }
6826
6827         /*
6828          * I don't think this can happen currently as we'll abort() if we catch
6829          * this case higher up, but in case somebody removes that we still can't
6830          * deal with it properly here yet, so just bail out of that's the case.
6831          */
6832         if (best->bytenr != rec->start) {
6833                 fprintf(stderr, "Extent start and backref starts don't match, "
6834                         "please use btrfs-image on this file system and send "
6835                         "it to a btrfs developer so they can make fsck fix "
6836                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6837                         rec->start, rec->nr);
6838                 ret = -EINVAL;
6839                 goto out;
6840         }
6841
6842         /*
6843          * Ok great we all agreed on an extent record, let's go find the real
6844          * references and fix up the ones that don't match.
6845          */
6846         list_for_each_entry(back, &rec->backrefs, list) {
6847                 if (back->full_backref || !back->is_data)
6848                         continue;
6849
6850                 dback = (struct data_backref *)back;
6851
6852                 /*
6853                  * Still ignoring backrefs that don't have a real ref attached
6854                  * to them.
6855                  */
6856                 if (dback->found_ref == 0)
6857                         continue;
6858
6859                 if (dback->bytes == best->bytes &&
6860                     dback->disk_bytenr == best->bytenr)
6861                         continue;
6862
6863                 ret = repair_ref(info, path, dback, best);
6864                 if (ret)
6865                         goto out;
6866         }
6867
6868         /*
6869          * Ok we messed with the actual refs, which means we need to drop our
6870          * entire cache and go back and rescan.  I know this is a huge pain and
6871          * adds a lot of extra work, but it's the only way to be safe.  Once all
6872          * the backrefs agree we may not need to do anything to the extent
6873          * record itself.
6874          */
6875         ret = -EAGAIN;
6876 out:
6877         while (!list_empty(&entries)) {
6878                 entry = list_entry(entries.next, struct extent_entry, list);
6879                 list_del_init(&entry->list);
6880                 free(entry);
6881         }
6882         return ret;
6883 }
6884
6885 static int process_duplicates(struct btrfs_root *root,
6886                               struct cache_tree *extent_cache,
6887                               struct extent_record *rec)
6888 {
6889         struct extent_record *good, *tmp;
6890         struct cache_extent *cache;
6891         int ret;
6892
6893         /*
6894          * If we found a extent record for this extent then return, or if we
6895          * have more than one duplicate we are likely going to need to delete
6896          * something.
6897          */
6898         if (rec->found_rec || rec->num_duplicates > 1)
6899                 return 0;
6900
6901         /* Shouldn't happen but just in case */
6902         BUG_ON(!rec->num_duplicates);
6903
6904         /*
6905          * So this happens if we end up with a backref that doesn't match the
6906          * actual extent entry.  So either the backref is bad or the extent
6907          * entry is bad.  Either way we want to have the extent_record actually
6908          * reflect what we found in the extent_tree, so we need to take the
6909          * duplicate out and use that as the extent_record since the only way we
6910          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6911          */
6912         remove_cache_extent(extent_cache, &rec->cache);
6913
6914         good = list_entry(rec->dups.next, struct extent_record, list);
6915         list_del_init(&good->list);
6916         INIT_LIST_HEAD(&good->backrefs);
6917         INIT_LIST_HEAD(&good->dups);
6918         good->cache.start = good->start;
6919         good->cache.size = good->nr;
6920         good->content_checked = 0;
6921         good->owner_ref_checked = 0;
6922         good->num_duplicates = 0;
6923         good->refs = rec->refs;
6924         list_splice_init(&rec->backrefs, &good->backrefs);
6925         while (1) {
6926                 cache = lookup_cache_extent(extent_cache, good->start,
6927                                             good->nr);
6928                 if (!cache)
6929                         break;
6930                 tmp = container_of(cache, struct extent_record, cache);
6931
6932                 /*
6933                  * If we find another overlapping extent and it's found_rec is
6934                  * set then it's a duplicate and we need to try and delete
6935                  * something.
6936                  */
6937                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6938                         if (list_empty(&good->list))
6939                                 list_add_tail(&good->list,
6940                                               &duplicate_extents);
6941                         good->num_duplicates += tmp->num_duplicates + 1;
6942                         list_splice_init(&tmp->dups, &good->dups);
6943                         list_del_init(&tmp->list);
6944                         list_add_tail(&tmp->list, &good->dups);
6945                         remove_cache_extent(extent_cache, &tmp->cache);
6946                         continue;
6947                 }
6948
6949                 /*
6950                  * Ok we have another non extent item backed extent rec, so lets
6951                  * just add it to this extent and carry on like we did above.
6952                  */
6953                 good->refs += tmp->refs;
6954                 list_splice_init(&tmp->backrefs, &good->backrefs);
6955                 remove_cache_extent(extent_cache, &tmp->cache);
6956                 free(tmp);
6957         }
6958         ret = insert_cache_extent(extent_cache, &good->cache);
6959         BUG_ON(ret);
6960         free(rec);
6961         return good->num_duplicates ? 0 : 1;
6962 }
6963
6964 static int delete_duplicate_records(struct btrfs_root *root,
6965                                     struct extent_record *rec)
6966 {
6967         struct btrfs_trans_handle *trans;
6968         LIST_HEAD(delete_list);
6969         struct btrfs_path *path;
6970         struct extent_record *tmp, *good, *n;
6971         int nr_del = 0;
6972         int ret = 0, err;
6973         struct btrfs_key key;
6974
6975         path = btrfs_alloc_path();
6976         if (!path) {
6977                 ret = -ENOMEM;
6978                 goto out;
6979         }
6980
6981         good = rec;
6982         /* Find the record that covers all of the duplicates. */
6983         list_for_each_entry(tmp, &rec->dups, list) {
6984                 if (good->start < tmp->start)
6985                         continue;
6986                 if (good->nr > tmp->nr)
6987                         continue;
6988
6989                 if (tmp->start + tmp->nr < good->start + good->nr) {
6990                         fprintf(stderr, "Ok we have overlapping extents that "
6991                                 "aren't completely covered by eachother, this "
6992                                 "is going to require more careful thought.  "
6993                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6994                                 tmp->start, tmp->nr, good->start, good->nr);
6995                         abort();
6996                 }
6997                 good = tmp;
6998         }
6999
7000         if (good != rec)
7001                 list_add_tail(&rec->list, &delete_list);
7002
7003         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7004                 if (tmp == good)
7005                         continue;
7006                 list_move_tail(&tmp->list, &delete_list);
7007         }
7008
7009         root = root->fs_info->extent_root;
7010         trans = btrfs_start_transaction(root, 1);
7011         if (IS_ERR(trans)) {
7012                 ret = PTR_ERR(trans);
7013                 goto out;
7014         }
7015
7016         list_for_each_entry(tmp, &delete_list, list) {
7017                 if (tmp->found_rec == 0)
7018                         continue;
7019                 key.objectid = tmp->start;
7020                 key.type = BTRFS_EXTENT_ITEM_KEY;
7021                 key.offset = tmp->nr;
7022
7023                 /* Shouldn't happen but just in case */
7024                 if (tmp->metadata) {
7025                         fprintf(stderr, "Well this shouldn't happen, extent "
7026                                 "record overlaps but is metadata? "
7027                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7028                         abort();
7029                 }
7030
7031                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7032                 if (ret) {
7033                         if (ret > 0)
7034                                 ret = -EINVAL;
7035                         break;
7036                 }
7037                 ret = btrfs_del_item(trans, root, path);
7038                 if (ret)
7039                         break;
7040                 btrfs_release_path(path);
7041                 nr_del++;
7042         }
7043         err = btrfs_commit_transaction(trans, root);
7044         if (err && !ret)
7045                 ret = err;
7046 out:
7047         while (!list_empty(&delete_list)) {
7048                 tmp = list_entry(delete_list.next, struct extent_record, list);
7049                 list_del_init(&tmp->list);
7050                 if (tmp == rec)
7051                         continue;
7052                 free(tmp);
7053         }
7054
7055         while (!list_empty(&rec->dups)) {
7056                 tmp = list_entry(rec->dups.next, struct extent_record, list);
7057                 list_del_init(&tmp->list);
7058                 free(tmp);
7059         }
7060
7061         btrfs_free_path(path);
7062
7063         if (!ret && !nr_del)
7064                 rec->num_duplicates = 0;
7065
7066         return ret ? ret : nr_del;
7067 }
7068
7069 static int find_possible_backrefs(struct btrfs_fs_info *info,
7070                                   struct btrfs_path *path,
7071                                   struct cache_tree *extent_cache,
7072                                   struct extent_record *rec)
7073 {
7074         struct btrfs_root *root;
7075         struct extent_backref *back;
7076         struct data_backref *dback;
7077         struct cache_extent *cache;
7078         struct btrfs_file_extent_item *fi;
7079         struct btrfs_key key;
7080         u64 bytenr, bytes;
7081         int ret;
7082
7083         list_for_each_entry(back, &rec->backrefs, list) {
7084                 /* Don't care about full backrefs (poor unloved backrefs) */
7085                 if (back->full_backref || !back->is_data)
7086                         continue;
7087
7088                 dback = (struct data_backref *)back;
7089
7090                 /* We found this one, we don't need to do a lookup */
7091                 if (dback->found_ref)
7092                         continue;
7093
7094                 key.objectid = dback->root;
7095                 key.type = BTRFS_ROOT_ITEM_KEY;
7096                 key.offset = (u64)-1;
7097
7098                 root = btrfs_read_fs_root(info, &key);
7099
7100                 /* No root, definitely a bad ref, skip */
7101                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7102                         continue;
7103                 /* Other err, exit */
7104                 if (IS_ERR(root))
7105                         return PTR_ERR(root);
7106
7107                 key.objectid = dback->owner;
7108                 key.type = BTRFS_EXTENT_DATA_KEY;
7109                 key.offset = dback->offset;
7110                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7111                 if (ret) {
7112                         btrfs_release_path(path);
7113                         if (ret < 0)
7114                                 return ret;
7115                         /* Didn't find it, we can carry on */
7116                         ret = 0;
7117                         continue;
7118                 }
7119
7120                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7121                                     struct btrfs_file_extent_item);
7122                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7123                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7124                 btrfs_release_path(path);
7125                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7126                 if (cache) {
7127                         struct extent_record *tmp;
7128                         tmp = container_of(cache, struct extent_record, cache);
7129
7130                         /*
7131                          * If we found an extent record for the bytenr for this
7132                          * particular backref then we can't add it to our
7133                          * current extent record.  We only want to add backrefs
7134                          * that don't have a corresponding extent item in the
7135                          * extent tree since they likely belong to this record
7136                          * and we need to fix it if it doesn't match bytenrs.
7137                          */
7138                         if  (tmp->found_rec)
7139                                 continue;
7140                 }
7141
7142                 dback->found_ref += 1;
7143                 dback->disk_bytenr = bytenr;
7144                 dback->bytes = bytes;
7145
7146                 /*
7147                  * Set this so the verify backref code knows not to trust the
7148                  * values in this backref.
7149                  */
7150                 back->broken = 1;
7151         }
7152
7153         return 0;
7154 }
7155
7156 /*
7157  * Record orphan data ref into corresponding root.
7158  *
7159  * Return 0 if the extent item contains data ref and recorded.
7160  * Return 1 if the extent item contains no useful data ref
7161  *   On that case, it may contains only shared_dataref or metadata backref
7162  *   or the file extent exists(this should be handled by the extent bytenr
7163  *   recovery routine)
7164  * Return <0 if something goes wrong.
7165  */
7166 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7167                                       struct extent_record *rec)
7168 {
7169         struct btrfs_key key;
7170         struct btrfs_root *dest_root;
7171         struct extent_backref *back;
7172         struct data_backref *dback;
7173         struct orphan_data_extent *orphan;
7174         struct btrfs_path *path;
7175         int recorded_data_ref = 0;
7176         int ret = 0;
7177
7178         if (rec->metadata)
7179                 return 1;
7180         path = btrfs_alloc_path();
7181         if (!path)
7182                 return -ENOMEM;
7183         list_for_each_entry(back, &rec->backrefs, list) {
7184                 if (back->full_backref || !back->is_data ||
7185                     !back->found_extent_tree)
7186                         continue;
7187                 dback = (struct data_backref *)back;
7188                 if (dback->found_ref)
7189                         continue;
7190                 key.objectid = dback->root;
7191                 key.type = BTRFS_ROOT_ITEM_KEY;
7192                 key.offset = (u64)-1;
7193
7194                 dest_root = btrfs_read_fs_root(fs_info, &key);
7195
7196                 /* For non-exist root we just skip it */
7197                 if (IS_ERR(dest_root) || !dest_root)
7198                         continue;
7199
7200                 key.objectid = dback->owner;
7201                 key.type = BTRFS_EXTENT_DATA_KEY;
7202                 key.offset = dback->offset;
7203
7204                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7205                 /*
7206                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7207                  * we need to record it for inode/file extent rebuild.
7208                  * For ret > 0, we record it only for file extent rebuild.
7209                  * For ret == 0, the file extent exists but only bytenr
7210                  * mismatch, let the original bytenr fix routine to handle,
7211                  * don't record it.
7212                  */
7213                 if (ret == 0)
7214                         continue;
7215                 ret = 0;
7216                 orphan = malloc(sizeof(*orphan));
7217                 if (!orphan) {
7218                         ret = -ENOMEM;
7219                         goto out;
7220                 }
7221                 INIT_LIST_HEAD(&orphan->list);
7222                 orphan->root = dback->root;
7223                 orphan->objectid = dback->owner;
7224                 orphan->offset = dback->offset;
7225                 orphan->disk_bytenr = rec->cache.start;
7226                 orphan->disk_len = rec->cache.size;
7227                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7228                 recorded_data_ref = 1;
7229         }
7230 out:
7231         btrfs_free_path(path);
7232         if (!ret)
7233                 return !recorded_data_ref;
7234         else
7235                 return ret;
7236 }
7237
7238 /*
7239  * when an incorrect extent item is found, this will delete
7240  * all of the existing entries for it and recreate them
7241  * based on what the tree scan found.
7242  */
7243 static int fixup_extent_refs(struct btrfs_fs_info *info,
7244                              struct cache_tree *extent_cache,
7245                              struct extent_record *rec)
7246 {
7247         struct btrfs_trans_handle *trans = NULL;
7248         int ret;
7249         struct btrfs_path *path;
7250         struct list_head *cur = rec->backrefs.next;
7251         struct cache_extent *cache;
7252         struct extent_backref *back;
7253         int allocated = 0;
7254         u64 flags = 0;
7255
7256         if (rec->flag_block_full_backref)
7257                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7258
7259         path = btrfs_alloc_path();
7260         if (!path)
7261                 return -ENOMEM;
7262
7263         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7264                 /*
7265                  * Sometimes the backrefs themselves are so broken they don't
7266                  * get attached to any meaningful rec, so first go back and
7267                  * check any of our backrefs that we couldn't find and throw
7268                  * them into the list if we find the backref so that
7269                  * verify_backrefs can figure out what to do.
7270                  */
7271                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7272                 if (ret < 0)
7273                         goto out;
7274         }
7275
7276         /* step one, make sure all of the backrefs agree */
7277         ret = verify_backrefs(info, path, rec);
7278         if (ret < 0)
7279                 goto out;
7280
7281         trans = btrfs_start_transaction(info->extent_root, 1);
7282         if (IS_ERR(trans)) {
7283                 ret = PTR_ERR(trans);
7284                 goto out;
7285         }
7286
7287         /* step two, delete all the existing records */
7288         ret = delete_extent_records(trans, info->extent_root, path,
7289                                     rec->start, rec->max_size);
7290
7291         if (ret < 0)
7292                 goto out;
7293
7294         /* was this block corrupt?  If so, don't add references to it */
7295         cache = lookup_cache_extent(info->corrupt_blocks,
7296                                     rec->start, rec->max_size);
7297         if (cache) {
7298                 ret = 0;
7299                 goto out;
7300         }
7301
7302         /* step three, recreate all the refs we did find */
7303         while(cur != &rec->backrefs) {
7304                 back = list_entry(cur, struct extent_backref, list);
7305                 cur = cur->next;
7306
7307                 /*
7308                  * if we didn't find any references, don't create a
7309                  * new extent record
7310                  */
7311                 if (!back->found_ref)
7312                         continue;
7313
7314                 rec->bad_full_backref = 0;
7315                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7316                 allocated = 1;
7317
7318                 if (ret)
7319                         goto out;
7320         }
7321 out:
7322         if (trans) {
7323                 int err = btrfs_commit_transaction(trans, info->extent_root);
7324                 if (!ret)
7325                         ret = err;
7326         }
7327
7328         btrfs_free_path(path);
7329         return ret;
7330 }
7331
7332 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7333                               struct extent_record *rec)
7334 {
7335         struct btrfs_trans_handle *trans;
7336         struct btrfs_root *root = fs_info->extent_root;
7337         struct btrfs_path *path;
7338         struct btrfs_extent_item *ei;
7339         struct btrfs_key key;
7340         u64 flags;
7341         int ret = 0;
7342
7343         key.objectid = rec->start;
7344         if (rec->metadata) {
7345                 key.type = BTRFS_METADATA_ITEM_KEY;
7346                 key.offset = rec->info_level;
7347         } else {
7348                 key.type = BTRFS_EXTENT_ITEM_KEY;
7349                 key.offset = rec->max_size;
7350         }
7351
7352         path = btrfs_alloc_path();
7353         if (!path)
7354                 return -ENOMEM;
7355
7356         trans = btrfs_start_transaction(root, 0);
7357         if (IS_ERR(trans)) {
7358                 btrfs_free_path(path);
7359                 return PTR_ERR(trans);
7360         }
7361
7362         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7363         if (ret < 0) {
7364                 btrfs_free_path(path);
7365                 btrfs_commit_transaction(trans, root);
7366                 return ret;
7367         } else if (ret) {
7368                 fprintf(stderr, "Didn't find extent for %llu\n",
7369                         (unsigned long long)rec->start);
7370                 btrfs_free_path(path);
7371                 btrfs_commit_transaction(trans, root);
7372                 return -ENOENT;
7373         }
7374
7375         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7376                             struct btrfs_extent_item);
7377         flags = btrfs_extent_flags(path->nodes[0], ei);
7378         if (rec->flag_block_full_backref) {
7379                 fprintf(stderr, "setting full backref on %llu\n",
7380                         (unsigned long long)key.objectid);
7381                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7382         } else {
7383                 fprintf(stderr, "clearing full backref on %llu\n",
7384                         (unsigned long long)key.objectid);
7385                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7386         }
7387         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7388         btrfs_mark_buffer_dirty(path->nodes[0]);
7389         btrfs_free_path(path);
7390         return btrfs_commit_transaction(trans, root);
7391 }
7392
7393 /* right now we only prune from the extent allocation tree */
7394 static int prune_one_block(struct btrfs_trans_handle *trans,
7395                            struct btrfs_fs_info *info,
7396                            struct btrfs_corrupt_block *corrupt)
7397 {
7398         int ret;
7399         struct btrfs_path path;
7400         struct extent_buffer *eb;
7401         u64 found;
7402         int slot;
7403         int nritems;
7404         int level = corrupt->level + 1;
7405
7406         btrfs_init_path(&path);
7407 again:
7408         /* we want to stop at the parent to our busted block */
7409         path.lowest_level = level;
7410
7411         ret = btrfs_search_slot(trans, info->extent_root,
7412                                 &corrupt->key, &path, -1, 1);
7413
7414         if (ret < 0)
7415                 goto out;
7416
7417         eb = path.nodes[level];
7418         if (!eb) {
7419                 ret = -ENOENT;
7420                 goto out;
7421         }
7422
7423         /*
7424          * hopefully the search gave us the block we want to prune,
7425          * lets try that first
7426          */
7427         slot = path.slots[level];
7428         found =  btrfs_node_blockptr(eb, slot);
7429         if (found == corrupt->cache.start)
7430                 goto del_ptr;
7431
7432         nritems = btrfs_header_nritems(eb);
7433
7434         /* the search failed, lets scan this node and hope we find it */
7435         for (slot = 0; slot < nritems; slot++) {
7436                 found =  btrfs_node_blockptr(eb, slot);
7437                 if (found == corrupt->cache.start)
7438                         goto del_ptr;
7439         }
7440         /*
7441          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7442          * to this block
7443          */
7444         if (eb == info->extent_root->node) {
7445                 ret = -ENOENT;
7446                 goto out;
7447         } else {
7448                 level++;
7449                 btrfs_release_path(&path);
7450                 goto again;
7451         }
7452
7453 del_ptr:
7454         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7455         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7456
7457 out:
7458         btrfs_release_path(&path);
7459         return ret;
7460 }
7461
7462 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7463 {
7464         struct btrfs_trans_handle *trans = NULL;
7465         struct cache_extent *cache;
7466         struct btrfs_corrupt_block *corrupt;
7467
7468         while (1) {
7469                 cache = search_cache_extent(info->corrupt_blocks, 0);
7470                 if (!cache)
7471                         break;
7472                 if (!trans) {
7473                         trans = btrfs_start_transaction(info->extent_root, 1);
7474                         if (IS_ERR(trans))
7475                                 return PTR_ERR(trans);
7476                 }
7477                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7478                 prune_one_block(trans, info, corrupt);
7479                 remove_cache_extent(info->corrupt_blocks, cache);
7480         }
7481         if (trans)
7482                 return btrfs_commit_transaction(trans, info->extent_root);
7483         return 0;
7484 }
7485
7486 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7487 {
7488         struct btrfs_block_group_cache *cache;
7489         u64 start, end;
7490         int ret;
7491
7492         while (1) {
7493                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7494                                             &start, &end, EXTENT_DIRTY);
7495                 if (ret)
7496                         break;
7497                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7498                                    GFP_NOFS);
7499         }
7500
7501         start = 0;
7502         while (1) {
7503                 cache = btrfs_lookup_first_block_group(fs_info, start);
7504                 if (!cache)
7505                         break;
7506                 if (cache->cached)
7507                         cache->cached = 0;
7508                 start = cache->key.objectid + cache->key.offset;
7509         }
7510 }
7511
7512 static int check_extent_refs(struct btrfs_root *root,
7513                              struct cache_tree *extent_cache)
7514 {
7515         struct extent_record *rec;
7516         struct cache_extent *cache;
7517         int err = 0;
7518         int ret = 0;
7519         int fixed = 0;
7520         int had_dups = 0;
7521         int recorded = 0;
7522
7523         if (repair) {
7524                 /*
7525                  * if we're doing a repair, we have to make sure
7526                  * we don't allocate from the problem extents.
7527                  * In the worst case, this will be all the
7528                  * extents in the FS
7529                  */
7530                 cache = search_cache_extent(extent_cache, 0);
7531                 while(cache) {
7532                         rec = container_of(cache, struct extent_record, cache);
7533                         set_extent_dirty(root->fs_info->excluded_extents,
7534                                          rec->start,
7535                                          rec->start + rec->max_size - 1,
7536                                          GFP_NOFS);
7537                         cache = next_cache_extent(cache);
7538                 }
7539
7540                 /* pin down all the corrupted blocks too */
7541                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7542                 while(cache) {
7543                         set_extent_dirty(root->fs_info->excluded_extents,
7544                                          cache->start,
7545                                          cache->start + cache->size - 1,
7546                                          GFP_NOFS);
7547                         cache = next_cache_extent(cache);
7548                 }
7549                 prune_corrupt_blocks(root->fs_info);
7550                 reset_cached_block_groups(root->fs_info);
7551         }
7552
7553         reset_cached_block_groups(root->fs_info);
7554
7555         /*
7556          * We need to delete any duplicate entries we find first otherwise we
7557          * could mess up the extent tree when we have backrefs that actually
7558          * belong to a different extent item and not the weird duplicate one.
7559          */
7560         while (repair && !list_empty(&duplicate_extents)) {
7561                 rec = list_entry(duplicate_extents.next, struct extent_record,
7562                                  list);
7563                 list_del_init(&rec->list);
7564
7565                 /* Sometimes we can find a backref before we find an actual
7566                  * extent, so we need to process it a little bit to see if there
7567                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7568                  * if this is a backref screwup.  If we need to delete stuff
7569                  * process_duplicates() will return 0, otherwise it will return
7570                  * 1 and we
7571                  */
7572                 if (process_duplicates(root, extent_cache, rec))
7573                         continue;
7574                 ret = delete_duplicate_records(root, rec);
7575                 if (ret < 0)
7576                         return ret;
7577                 /*
7578                  * delete_duplicate_records will return the number of entries
7579                  * deleted, so if it's greater than 0 then we know we actually
7580                  * did something and we need to remove.
7581                  */
7582                 if (ret)
7583                         had_dups = 1;
7584         }
7585
7586         if (had_dups)
7587                 return -EAGAIN;
7588
7589         while(1) {
7590                 int cur_err = 0;
7591
7592                 fixed = 0;
7593                 recorded = 0;
7594                 cache = search_cache_extent(extent_cache, 0);
7595                 if (!cache)
7596                         break;
7597                 rec = container_of(cache, struct extent_record, cache);
7598                 if (rec->num_duplicates) {
7599                         fprintf(stderr, "extent item %llu has multiple extent "
7600                                 "items\n", (unsigned long long)rec->start);
7601                         err = 1;
7602                         cur_err = 1;
7603                 }
7604
7605                 if (rec->refs != rec->extent_item_refs) {
7606                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7607                                 (unsigned long long)rec->start,
7608                                 (unsigned long long)rec->nr);
7609                         fprintf(stderr, "extent item %llu, found %llu\n",
7610                                 (unsigned long long)rec->extent_item_refs,
7611                                 (unsigned long long)rec->refs);
7612                         ret = record_orphan_data_extents(root->fs_info, rec);
7613                         if (ret < 0)
7614                                 goto repair_abort;
7615                         if (ret == 0) {
7616                                 recorded = 1;
7617                         } else {
7618                                 /*
7619                                  * we can't use the extent to repair file
7620                                  * extent, let the fallback method handle it.
7621                                  */
7622                                 if (!fixed && repair) {
7623                                         ret = fixup_extent_refs(
7624                                                         root->fs_info,
7625                                                         extent_cache, rec);
7626                                         if (ret)
7627                                                 goto repair_abort;
7628                                         fixed = 1;
7629                                 }
7630                         }
7631                         err = 1;
7632                         cur_err = 1;
7633                 }
7634                 if (all_backpointers_checked(rec, 1)) {
7635                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7636                                 (unsigned long long)rec->start,
7637                                 (unsigned long long)rec->nr);
7638
7639                         if (!fixed && !recorded && repair) {
7640                                 ret = fixup_extent_refs(root->fs_info,
7641                                                         extent_cache, rec);
7642                                 if (ret)
7643                                         goto repair_abort;
7644                                 fixed = 1;
7645                         }
7646                         cur_err = 1;
7647                         err = 1;
7648                 }
7649                 if (!rec->owner_ref_checked) {
7650                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7651                                 (unsigned long long)rec->start,
7652                                 (unsigned long long)rec->nr);
7653                         if (!fixed && !recorded && repair) {
7654                                 ret = fixup_extent_refs(root->fs_info,
7655                                                         extent_cache, rec);
7656                                 if (ret)
7657                                         goto repair_abort;
7658                                 fixed = 1;
7659                         }
7660                         err = 1;
7661                         cur_err = 1;
7662                 }
7663                 if (rec->bad_full_backref) {
7664                         fprintf(stderr, "bad full backref, on [%llu]\n",
7665                                 (unsigned long long)rec->start);
7666                         if (repair) {
7667                                 ret = fixup_extent_flags(root->fs_info, rec);
7668                                 if (ret)
7669                                         goto repair_abort;
7670                                 fixed = 1;
7671                         }
7672                         err = 1;
7673                         cur_err = 1;
7674                 }
7675                 /*
7676                  * Although it's not a extent ref's problem, we reuse this
7677                  * routine for error reporting.
7678                  * No repair function yet.
7679                  */
7680                 if (rec->crossing_stripes) {
7681                         fprintf(stderr,
7682                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7683                                 rec->start, rec->start + rec->max_size);
7684                         err = 1;
7685                         cur_err = 1;
7686                 }
7687
7688                 if (rec->wrong_chunk_type) {
7689                         fprintf(stderr,
7690                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7691                                 rec->start, rec->start + rec->max_size);
7692                         err = 1;
7693                         cur_err = 1;
7694                 }
7695
7696                 remove_cache_extent(extent_cache, cache);
7697                 free_all_extent_backrefs(rec);
7698                 if (!init_extent_tree && repair && (!cur_err || fixed))
7699                         clear_extent_dirty(root->fs_info->excluded_extents,
7700                                            rec->start,
7701                                            rec->start + rec->max_size - 1,
7702                                            GFP_NOFS);
7703                 free(rec);
7704         }
7705 repair_abort:
7706         if (repair) {
7707                 if (ret && ret != -EAGAIN) {
7708                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7709                         exit(1);
7710                 } else if (!ret) {
7711                         struct btrfs_trans_handle *trans;
7712
7713                         root = root->fs_info->extent_root;
7714                         trans = btrfs_start_transaction(root, 1);
7715                         if (IS_ERR(trans)) {
7716                                 ret = PTR_ERR(trans);
7717                                 goto repair_abort;
7718                         }
7719
7720                         btrfs_fix_block_accounting(trans, root);
7721                         ret = btrfs_commit_transaction(trans, root);
7722                         if (ret)
7723                                 goto repair_abort;
7724                 }
7725                 if (err)
7726                         fprintf(stderr, "repaired damaged extent references\n");
7727                 return ret;
7728         }
7729         return err;
7730 }
7731
7732 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7733 {
7734         u64 stripe_size;
7735
7736         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7737                 stripe_size = length;
7738                 stripe_size /= num_stripes;
7739         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7740                 stripe_size = length * 2;
7741                 stripe_size /= num_stripes;
7742         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7743                 stripe_size = length;
7744                 stripe_size /= (num_stripes - 1);
7745         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7746                 stripe_size = length;
7747                 stripe_size /= (num_stripes - 2);
7748         } else {
7749                 stripe_size = length;
7750         }
7751         return stripe_size;
7752 }
7753
7754 /*
7755  * Check the chunk with its block group/dev list ref:
7756  * Return 0 if all refs seems valid.
7757  * Return 1 if part of refs seems valid, need later check for rebuild ref
7758  * like missing block group and needs to search extent tree to rebuild them.
7759  * Return -1 if essential refs are missing and unable to rebuild.
7760  */
7761 static int check_chunk_refs(struct chunk_record *chunk_rec,
7762                             struct block_group_tree *block_group_cache,
7763                             struct device_extent_tree *dev_extent_cache,
7764                             int silent)
7765 {
7766         struct cache_extent *block_group_item;
7767         struct block_group_record *block_group_rec;
7768         struct cache_extent *dev_extent_item;
7769         struct device_extent_record *dev_extent_rec;
7770         u64 devid;
7771         u64 offset;
7772         u64 length;
7773         int metadump_v2 = 0;
7774         int i;
7775         int ret = 0;
7776
7777         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7778                                                chunk_rec->offset,
7779                                                chunk_rec->length);
7780         if (block_group_item) {
7781                 block_group_rec = container_of(block_group_item,
7782                                                struct block_group_record,
7783                                                cache);
7784                 if (chunk_rec->length != block_group_rec->offset ||
7785                     chunk_rec->offset != block_group_rec->objectid ||
7786                     (!metadump_v2 &&
7787                      chunk_rec->type_flags != block_group_rec->flags)) {
7788                         if (!silent)
7789                                 fprintf(stderr,
7790                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7791                                         chunk_rec->objectid,
7792                                         chunk_rec->type,
7793                                         chunk_rec->offset,
7794                                         chunk_rec->length,
7795                                         chunk_rec->offset,
7796                                         chunk_rec->type_flags,
7797                                         block_group_rec->objectid,
7798                                         block_group_rec->type,
7799                                         block_group_rec->offset,
7800                                         block_group_rec->offset,
7801                                         block_group_rec->objectid,
7802                                         block_group_rec->flags);
7803                         ret = -1;
7804                 } else {
7805                         list_del_init(&block_group_rec->list);
7806                         chunk_rec->bg_rec = block_group_rec;
7807                 }
7808         } else {
7809                 if (!silent)
7810                         fprintf(stderr,
7811                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7812                                 chunk_rec->objectid,
7813                                 chunk_rec->type,
7814                                 chunk_rec->offset,
7815                                 chunk_rec->length,
7816                                 chunk_rec->offset,
7817                                 chunk_rec->type_flags);
7818                 ret = 1;
7819         }
7820
7821         if (metadump_v2)
7822                 return ret;
7823
7824         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7825                                     chunk_rec->num_stripes);
7826         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7827                 devid = chunk_rec->stripes[i].devid;
7828                 offset = chunk_rec->stripes[i].offset;
7829                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7830                                                        devid, offset, length);
7831                 if (dev_extent_item) {
7832                         dev_extent_rec = container_of(dev_extent_item,
7833                                                 struct device_extent_record,
7834                                                 cache);
7835                         if (dev_extent_rec->objectid != devid ||
7836                             dev_extent_rec->offset != offset ||
7837                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7838                             dev_extent_rec->length != length) {
7839                                 if (!silent)
7840                                         fprintf(stderr,
7841                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7842                                                 chunk_rec->objectid,
7843                                                 chunk_rec->type,
7844                                                 chunk_rec->offset,
7845                                                 chunk_rec->stripes[i].devid,
7846                                                 chunk_rec->stripes[i].offset,
7847                                                 dev_extent_rec->objectid,
7848                                                 dev_extent_rec->offset,
7849                                                 dev_extent_rec->length);
7850                                 ret = -1;
7851                         } else {
7852                                 list_move(&dev_extent_rec->chunk_list,
7853                                           &chunk_rec->dextents);
7854                         }
7855                 } else {
7856                         if (!silent)
7857                                 fprintf(stderr,
7858                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7859                                         chunk_rec->objectid,
7860                                         chunk_rec->type,
7861                                         chunk_rec->offset,
7862                                         chunk_rec->stripes[i].devid,
7863                                         chunk_rec->stripes[i].offset);
7864                         ret = -1;
7865                 }
7866         }
7867         return ret;
7868 }
7869
7870 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7871 int check_chunks(struct cache_tree *chunk_cache,
7872                  struct block_group_tree *block_group_cache,
7873                  struct device_extent_tree *dev_extent_cache,
7874                  struct list_head *good, struct list_head *bad,
7875                  struct list_head *rebuild, int silent)
7876 {
7877         struct cache_extent *chunk_item;
7878         struct chunk_record *chunk_rec;
7879         struct block_group_record *bg_rec;
7880         struct device_extent_record *dext_rec;
7881         int err;
7882         int ret = 0;
7883
7884         chunk_item = first_cache_extent(chunk_cache);
7885         while (chunk_item) {
7886                 chunk_rec = container_of(chunk_item, struct chunk_record,
7887                                          cache);
7888                 err = check_chunk_refs(chunk_rec, block_group_cache,
7889                                        dev_extent_cache, silent);
7890                 if (err < 0)
7891                         ret = err;
7892                 if (err == 0 && good)
7893                         list_add_tail(&chunk_rec->list, good);
7894                 if (err > 0 && rebuild)
7895                         list_add_tail(&chunk_rec->list, rebuild);
7896                 if (err < 0 && bad)
7897                         list_add_tail(&chunk_rec->list, bad);
7898                 chunk_item = next_cache_extent(chunk_item);
7899         }
7900
7901         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7902                 if (!silent)
7903                         fprintf(stderr,
7904                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7905                                 bg_rec->objectid,
7906                                 bg_rec->offset,
7907                                 bg_rec->flags);
7908                 if (!ret)
7909                         ret = 1;
7910         }
7911
7912         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7913                             chunk_list) {
7914                 if (!silent)
7915                         fprintf(stderr,
7916                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7917                                 dext_rec->objectid,
7918                                 dext_rec->offset,
7919                                 dext_rec->length);
7920                 if (!ret)
7921                         ret = 1;
7922         }
7923         return ret;
7924 }
7925
7926
7927 static int check_device_used(struct device_record *dev_rec,
7928                              struct device_extent_tree *dext_cache)
7929 {
7930         struct cache_extent *cache;
7931         struct device_extent_record *dev_extent_rec;
7932         u64 total_byte = 0;
7933
7934         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7935         while (cache) {
7936                 dev_extent_rec = container_of(cache,
7937                                               struct device_extent_record,
7938                                               cache);
7939                 if (dev_extent_rec->objectid != dev_rec->devid)
7940                         break;
7941
7942                 list_del_init(&dev_extent_rec->device_list);
7943                 total_byte += dev_extent_rec->length;
7944                 cache = next_cache_extent(cache);
7945         }
7946
7947         if (total_byte != dev_rec->byte_used) {
7948                 fprintf(stderr,
7949                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7950                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7951                         dev_rec->type, dev_rec->offset);
7952                 return -1;
7953         } else {
7954                 return 0;
7955         }
7956 }
7957
7958 /* check btrfs_dev_item -> btrfs_dev_extent */
7959 static int check_devices(struct rb_root *dev_cache,
7960                          struct device_extent_tree *dev_extent_cache)
7961 {
7962         struct rb_node *dev_node;
7963         struct device_record *dev_rec;
7964         struct device_extent_record *dext_rec;
7965         int err;
7966         int ret = 0;
7967
7968         dev_node = rb_first(dev_cache);
7969         while (dev_node) {
7970                 dev_rec = container_of(dev_node, struct device_record, node);
7971                 err = check_device_used(dev_rec, dev_extent_cache);
7972                 if (err)
7973                         ret = err;
7974
7975                 dev_node = rb_next(dev_node);
7976         }
7977         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7978                             device_list) {
7979                 fprintf(stderr,
7980                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7981                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
7982                 if (!ret)
7983                         ret = 1;
7984         }
7985         return ret;
7986 }
7987
7988 static int add_root_item_to_list(struct list_head *head,
7989                                   u64 objectid, u64 bytenr, u64 last_snapshot,
7990                                   u8 level, u8 drop_level,
7991                                   int level_size, struct btrfs_key *drop_key)
7992 {
7993
7994         struct root_item_record *ri_rec;
7995         ri_rec = malloc(sizeof(*ri_rec));
7996         if (!ri_rec)
7997                 return -ENOMEM;
7998         ri_rec->bytenr = bytenr;
7999         ri_rec->objectid = objectid;
8000         ri_rec->level = level;
8001         ri_rec->level_size = level_size;
8002         ri_rec->drop_level = drop_level;
8003         ri_rec->last_snapshot = last_snapshot;
8004         if (drop_key)
8005                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8006         list_add_tail(&ri_rec->list, head);
8007
8008         return 0;
8009 }
8010
8011 static void free_root_item_list(struct list_head *list)
8012 {
8013         struct root_item_record *ri_rec;
8014
8015         while (!list_empty(list)) {
8016                 ri_rec = list_first_entry(list, struct root_item_record,
8017                                           list);
8018                 list_del_init(&ri_rec->list);
8019                 free(ri_rec);
8020         }
8021 }
8022
8023 static int deal_root_from_list(struct list_head *list,
8024                                struct btrfs_root *root,
8025                                struct block_info *bits,
8026                                int bits_nr,
8027                                struct cache_tree *pending,
8028                                struct cache_tree *seen,
8029                                struct cache_tree *reada,
8030                                struct cache_tree *nodes,
8031                                struct cache_tree *extent_cache,
8032                                struct cache_tree *chunk_cache,
8033                                struct rb_root *dev_cache,
8034                                struct block_group_tree *block_group_cache,
8035                                struct device_extent_tree *dev_extent_cache)
8036 {
8037         int ret = 0;
8038         u64 last;
8039
8040         while (!list_empty(list)) {
8041                 struct root_item_record *rec;
8042                 struct extent_buffer *buf;
8043                 rec = list_entry(list->next,
8044                                  struct root_item_record, list);
8045                 last = 0;
8046                 buf = read_tree_block(root->fs_info->tree_root,
8047                                       rec->bytenr, rec->level_size, 0);
8048                 if (!extent_buffer_uptodate(buf)) {
8049                         free_extent_buffer(buf);
8050                         ret = -EIO;
8051                         break;
8052                 }
8053                 add_root_to_pending(buf, extent_cache, pending,
8054                                     seen, nodes, rec->objectid);
8055                 /*
8056                  * To rebuild extent tree, we need deal with snapshot
8057                  * one by one, otherwise we deal with node firstly which
8058                  * can maximize readahead.
8059                  */
8060                 while (1) {
8061                         ret = run_next_block(root, bits, bits_nr, &last,
8062                                              pending, seen, reada, nodes,
8063                                              extent_cache, chunk_cache,
8064                                              dev_cache, block_group_cache,
8065                                              dev_extent_cache, rec);
8066                         if (ret != 0)
8067                                 break;
8068                 }
8069                 free_extent_buffer(buf);
8070                 list_del(&rec->list);
8071                 free(rec);
8072                 if (ret < 0)
8073                         break;
8074         }
8075         while (ret >= 0) {
8076                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8077                                      reada, nodes, extent_cache, chunk_cache,
8078                                      dev_cache, block_group_cache,
8079                                      dev_extent_cache, NULL);
8080                 if (ret != 0) {
8081                         if (ret > 0)
8082                                 ret = 0;
8083                         break;
8084                 }
8085         }
8086         return ret;
8087 }
8088
8089 static int check_chunks_and_extents(struct btrfs_root *root)
8090 {
8091         struct rb_root dev_cache;
8092         struct cache_tree chunk_cache;
8093         struct block_group_tree block_group_cache;
8094         struct device_extent_tree dev_extent_cache;
8095         struct cache_tree extent_cache;
8096         struct cache_tree seen;
8097         struct cache_tree pending;
8098         struct cache_tree reada;
8099         struct cache_tree nodes;
8100         struct extent_io_tree excluded_extents;
8101         struct cache_tree corrupt_blocks;
8102         struct btrfs_path path;
8103         struct btrfs_key key;
8104         struct btrfs_key found_key;
8105         int ret, err = 0;
8106         struct block_info *bits;
8107         int bits_nr;
8108         struct extent_buffer *leaf;
8109         int slot;
8110         struct btrfs_root_item ri;
8111         struct list_head dropping_trees;
8112         struct list_head normal_trees;
8113         struct btrfs_root *root1;
8114         u64 objectid;
8115         u32 level_size;
8116         u8 level;
8117
8118         dev_cache = RB_ROOT;
8119         cache_tree_init(&chunk_cache);
8120         block_group_tree_init(&block_group_cache);
8121         device_extent_tree_init(&dev_extent_cache);
8122
8123         cache_tree_init(&extent_cache);
8124         cache_tree_init(&seen);
8125         cache_tree_init(&pending);
8126         cache_tree_init(&nodes);
8127         cache_tree_init(&reada);
8128         cache_tree_init(&corrupt_blocks);
8129         extent_io_tree_init(&excluded_extents);
8130         INIT_LIST_HEAD(&dropping_trees);
8131         INIT_LIST_HEAD(&normal_trees);
8132
8133         if (repair) {
8134                 root->fs_info->excluded_extents = &excluded_extents;
8135                 root->fs_info->fsck_extent_cache = &extent_cache;
8136                 root->fs_info->free_extent_hook = free_extent_hook;
8137                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8138         }
8139
8140         bits_nr = 1024;
8141         bits = malloc(bits_nr * sizeof(struct block_info));
8142         if (!bits) {
8143                 perror("malloc");
8144                 exit(1);
8145         }
8146
8147         if (ctx.progress_enabled) {
8148                 ctx.tp = TASK_EXTENTS;
8149                 task_start(ctx.info);
8150         }
8151
8152 again:
8153         root1 = root->fs_info->tree_root;
8154         level = btrfs_header_level(root1->node);
8155         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8156                                     root1->node->start, 0, level, 0,
8157                                     btrfs_level_size(root1, level), NULL);
8158         if (ret < 0)
8159                 goto out;
8160         root1 = root->fs_info->chunk_root;
8161         level = btrfs_header_level(root1->node);
8162         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8163                                     root1->node->start, 0, level, 0,
8164                                     btrfs_level_size(root1, level), NULL);
8165         if (ret < 0)
8166                 goto out;
8167         btrfs_init_path(&path);
8168         key.offset = 0;
8169         key.objectid = 0;
8170         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8171         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8172                                         &key, &path, 0, 0);
8173         if (ret < 0)
8174                 goto out;
8175         while(1) {
8176                 leaf = path.nodes[0];
8177                 slot = path.slots[0];
8178                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8179                         ret = btrfs_next_leaf(root, &path);
8180                         if (ret != 0)
8181                                 break;
8182                         leaf = path.nodes[0];
8183                         slot = path.slots[0];
8184                 }
8185                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8186                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8187                         unsigned long offset;
8188                         u64 last_snapshot;
8189
8190                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8191                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8192                         last_snapshot = btrfs_root_last_snapshot(&ri);
8193                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8194                                 level = btrfs_root_level(&ri);
8195                                 level_size = btrfs_level_size(root, level);
8196                                 ret = add_root_item_to_list(&normal_trees,
8197                                                 found_key.objectid,
8198                                                 btrfs_root_bytenr(&ri),
8199                                                 last_snapshot, level,
8200                                                 0, level_size, NULL);
8201                                 if (ret < 0)
8202                                         goto out;
8203                         } else {
8204                                 level = btrfs_root_level(&ri);
8205                                 level_size = btrfs_level_size(root, level);
8206                                 objectid = found_key.objectid;
8207                                 btrfs_disk_key_to_cpu(&found_key,
8208                                                       &ri.drop_progress);
8209                                 ret = add_root_item_to_list(&dropping_trees,
8210                                                 objectid,
8211                                                 btrfs_root_bytenr(&ri),
8212                                                 last_snapshot, level,
8213                                                 ri.drop_level,
8214                                                 level_size, &found_key);
8215                                 if (ret < 0)
8216                                         goto out;
8217                         }
8218                 }
8219                 path.slots[0]++;
8220         }
8221         btrfs_release_path(&path);
8222
8223         /*
8224          * check_block can return -EAGAIN if it fixes something, please keep
8225          * this in mind when dealing with return values from these functions, if
8226          * we get -EAGAIN we want to fall through and restart the loop.
8227          */
8228         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8229                                   &seen, &reada, &nodes, &extent_cache,
8230                                   &chunk_cache, &dev_cache, &block_group_cache,
8231                                   &dev_extent_cache);
8232         if (ret < 0) {
8233                 if (ret == -EAGAIN)
8234                         goto loop;
8235                 goto out;
8236         }
8237         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8238                                   &pending, &seen, &reada, &nodes,
8239                                   &extent_cache, &chunk_cache, &dev_cache,
8240                                   &block_group_cache, &dev_extent_cache);
8241         if (ret < 0) {
8242                 if (ret == -EAGAIN)
8243                         goto loop;
8244                 goto out;
8245         }
8246
8247         ret = check_chunks(&chunk_cache, &block_group_cache,
8248                            &dev_extent_cache, NULL, NULL, NULL, 0);
8249         if (ret) {
8250                 if (ret == -EAGAIN)
8251                         goto loop;
8252                 err = ret;
8253         }
8254
8255         ret = check_extent_refs(root, &extent_cache);
8256         if (ret < 0) {
8257                 if (ret == -EAGAIN)
8258                         goto loop;
8259                 goto out;
8260         }
8261
8262         ret = check_devices(&dev_cache, &dev_extent_cache);
8263         if (ret && err)
8264                 ret = err;
8265
8266 out:
8267         task_stop(ctx.info);
8268         if (repair) {
8269                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8270                 extent_io_tree_cleanup(&excluded_extents);
8271                 root->fs_info->fsck_extent_cache = NULL;
8272                 root->fs_info->free_extent_hook = NULL;
8273                 root->fs_info->corrupt_blocks = NULL;
8274                 root->fs_info->excluded_extents = NULL;
8275         }
8276         free(bits);
8277         free_chunk_cache_tree(&chunk_cache);
8278         free_device_cache_tree(&dev_cache);
8279         free_block_group_tree(&block_group_cache);
8280         free_device_extent_tree(&dev_extent_cache);
8281         free_extent_cache_tree(&seen);
8282         free_extent_cache_tree(&pending);
8283         free_extent_cache_tree(&reada);
8284         free_extent_cache_tree(&nodes);
8285         return ret;
8286 loop:
8287         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8288         free_extent_cache_tree(&seen);
8289         free_extent_cache_tree(&pending);
8290         free_extent_cache_tree(&reada);
8291         free_extent_cache_tree(&nodes);
8292         free_chunk_cache_tree(&chunk_cache);
8293         free_block_group_tree(&block_group_cache);
8294         free_device_cache_tree(&dev_cache);
8295         free_device_extent_tree(&dev_extent_cache);
8296         free_extent_record_cache(root->fs_info, &extent_cache);
8297         free_root_item_list(&normal_trees);
8298         free_root_item_list(&dropping_trees);
8299         extent_io_tree_cleanup(&excluded_extents);
8300         goto again;
8301 }
8302
8303 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8304                            struct btrfs_root *root, int overwrite)
8305 {
8306         struct extent_buffer *c;
8307         struct extent_buffer *old = root->node;
8308         int level;
8309         int ret;
8310         struct btrfs_disk_key disk_key = {0,0,0};
8311
8312         level = 0;
8313
8314         if (overwrite) {
8315                 c = old;
8316                 extent_buffer_get(c);
8317                 goto init;
8318         }
8319         c = btrfs_alloc_free_block(trans, root,
8320                                    btrfs_level_size(root, 0),
8321                                    root->root_key.objectid,
8322                                    &disk_key, level, 0, 0);
8323         if (IS_ERR(c)) {
8324                 c = old;
8325                 extent_buffer_get(c);
8326                 overwrite = 1;
8327         }
8328 init:
8329         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8330         btrfs_set_header_level(c, level);
8331         btrfs_set_header_bytenr(c, c->start);
8332         btrfs_set_header_generation(c, trans->transid);
8333         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8334         btrfs_set_header_owner(c, root->root_key.objectid);
8335
8336         write_extent_buffer(c, root->fs_info->fsid,
8337                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8338
8339         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8340                             btrfs_header_chunk_tree_uuid(c),
8341                             BTRFS_UUID_SIZE);
8342
8343         btrfs_mark_buffer_dirty(c);
8344         /*
8345          * this case can happen in the following case:
8346          *
8347          * 1.overwrite previous root.
8348          *
8349          * 2.reinit reloc data root, this is because we skip pin
8350          * down reloc data tree before which means we can allocate
8351          * same block bytenr here.
8352          */
8353         if (old->start == c->start) {
8354                 btrfs_set_root_generation(&root->root_item,
8355                                           trans->transid);
8356                 root->root_item.level = btrfs_header_level(root->node);
8357                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8358                                         &root->root_key, &root->root_item);
8359                 if (ret) {
8360                         free_extent_buffer(c);
8361                         return ret;
8362                 }
8363         }
8364         free_extent_buffer(old);
8365         root->node = c;
8366         add_root_to_dirty_list(root);
8367         return 0;
8368 }
8369
8370 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8371                                 struct extent_buffer *eb, int tree_root)
8372 {
8373         struct extent_buffer *tmp;
8374         struct btrfs_root_item *ri;
8375         struct btrfs_key key;
8376         u64 bytenr;
8377         u32 leafsize;
8378         int level = btrfs_header_level(eb);
8379         int nritems;
8380         int ret;
8381         int i;
8382
8383         /*
8384          * If we have pinned this block before, don't pin it again.
8385          * This can not only avoid forever loop with broken filesystem
8386          * but also give us some speedups.
8387          */
8388         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8389                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8390                 return 0;
8391
8392         btrfs_pin_extent(fs_info, eb->start, eb->len);
8393
8394         leafsize = btrfs_super_leafsize(fs_info->super_copy);
8395         nritems = btrfs_header_nritems(eb);
8396         for (i = 0; i < nritems; i++) {
8397                 if (level == 0) {
8398                         btrfs_item_key_to_cpu(eb, &key, i);
8399                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8400                                 continue;
8401                         /* Skip the extent root and reloc roots */
8402                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8403                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8404                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8405                                 continue;
8406                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8407                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8408
8409                         /*
8410                          * If at any point we start needing the real root we
8411                          * will have to build a stump root for the root we are
8412                          * in, but for now this doesn't actually use the root so
8413                          * just pass in extent_root.
8414                          */
8415                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8416                                               leafsize, 0);
8417                         if (!extent_buffer_uptodate(tmp)) {
8418                                 fprintf(stderr, "Error reading root block\n");
8419                                 return -EIO;
8420                         }
8421                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8422                         free_extent_buffer(tmp);
8423                         if (ret)
8424                                 return ret;
8425                 } else {
8426                         bytenr = btrfs_node_blockptr(eb, i);
8427
8428                         /* If we aren't the tree root don't read the block */
8429                         if (level == 1 && !tree_root) {
8430                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
8431                                 continue;
8432                         }
8433
8434                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8435                                               leafsize, 0);
8436                         if (!extent_buffer_uptodate(tmp)) {
8437                                 fprintf(stderr, "Error reading tree block\n");
8438                                 return -EIO;
8439                         }
8440                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8441                         free_extent_buffer(tmp);
8442                         if (ret)
8443                                 return ret;
8444                 }
8445         }
8446
8447         return 0;
8448 }
8449
8450 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8451 {
8452         int ret;
8453
8454         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8455         if (ret)
8456                 return ret;
8457
8458         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8459 }
8460
8461 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8462 {
8463         struct btrfs_block_group_cache *cache;
8464         struct btrfs_path *path;
8465         struct extent_buffer *leaf;
8466         struct btrfs_chunk *chunk;
8467         struct btrfs_key key;
8468         int ret;
8469         u64 start;
8470
8471         path = btrfs_alloc_path();
8472         if (!path)
8473                 return -ENOMEM;
8474
8475         key.objectid = 0;
8476         key.type = BTRFS_CHUNK_ITEM_KEY;
8477         key.offset = 0;
8478
8479         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8480         if (ret < 0) {
8481                 btrfs_free_path(path);
8482                 return ret;
8483         }
8484
8485         /*
8486          * We do this in case the block groups were screwed up and had alloc
8487          * bits that aren't actually set on the chunks.  This happens with
8488          * restored images every time and could happen in real life I guess.
8489          */
8490         fs_info->avail_data_alloc_bits = 0;
8491         fs_info->avail_metadata_alloc_bits = 0;
8492         fs_info->avail_system_alloc_bits = 0;
8493
8494         /* First we need to create the in-memory block groups */
8495         while (1) {
8496                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8497                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8498                         if (ret < 0) {
8499                                 btrfs_free_path(path);
8500                                 return ret;
8501                         }
8502                         if (ret) {
8503                                 ret = 0;
8504                                 break;
8505                         }
8506                 }
8507                 leaf = path->nodes[0];
8508                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8509                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8510                         path->slots[0]++;
8511                         continue;
8512                 }
8513
8514                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8515                                        struct btrfs_chunk);
8516                 btrfs_add_block_group(fs_info, 0,
8517                                       btrfs_chunk_type(leaf, chunk),
8518                                       key.objectid, key.offset,
8519                                       btrfs_chunk_length(leaf, chunk));
8520                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8521                                  key.offset + btrfs_chunk_length(leaf, chunk),
8522                                  GFP_NOFS);
8523                 path->slots[0]++;
8524         }
8525         start = 0;
8526         while (1) {
8527                 cache = btrfs_lookup_first_block_group(fs_info, start);
8528                 if (!cache)
8529                         break;
8530                 cache->cached = 1;
8531                 start = cache->key.objectid + cache->key.offset;
8532         }
8533
8534         btrfs_free_path(path);
8535         return 0;
8536 }
8537
8538 static int reset_balance(struct btrfs_trans_handle *trans,
8539                          struct btrfs_fs_info *fs_info)
8540 {
8541         struct btrfs_root *root = fs_info->tree_root;
8542         struct btrfs_path *path;
8543         struct extent_buffer *leaf;
8544         struct btrfs_key key;
8545         int del_slot, del_nr = 0;
8546         int ret;
8547         int found = 0;
8548
8549         path = btrfs_alloc_path();
8550         if (!path)
8551                 return -ENOMEM;
8552
8553         key.objectid = BTRFS_BALANCE_OBJECTID;
8554         key.type = BTRFS_BALANCE_ITEM_KEY;
8555         key.offset = 0;
8556
8557         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8558         if (ret) {
8559                 if (ret > 0)
8560                         ret = 0;
8561                 if (!ret)
8562                         goto reinit_data_reloc;
8563                 else
8564                         goto out;
8565         }
8566
8567         ret = btrfs_del_item(trans, root, path);
8568         if (ret)
8569                 goto out;
8570         btrfs_release_path(path);
8571
8572         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8573         key.type = BTRFS_ROOT_ITEM_KEY;
8574         key.offset = 0;
8575
8576         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8577         if (ret < 0)
8578                 goto out;
8579         while (1) {
8580                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8581                         if (!found)
8582                                 break;
8583
8584                         if (del_nr) {
8585                                 ret = btrfs_del_items(trans, root, path,
8586                                                       del_slot, del_nr);
8587                                 del_nr = 0;
8588                                 if (ret)
8589                                         goto out;
8590                         }
8591                         key.offset++;
8592                         btrfs_release_path(path);
8593
8594                         found = 0;
8595                         ret = btrfs_search_slot(trans, root, &key, path,
8596                                                 -1, 1);
8597                         if (ret < 0)
8598                                 goto out;
8599                         continue;
8600                 }
8601                 found = 1;
8602                 leaf = path->nodes[0];
8603                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8604                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8605                         break;
8606                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8607                         path->slots[0]++;
8608                         continue;
8609                 }
8610                 if (!del_nr) {
8611                         del_slot = path->slots[0];
8612                         del_nr = 1;
8613                 } else {
8614                         del_nr++;
8615                 }
8616                 path->slots[0]++;
8617         }
8618
8619         if (del_nr) {
8620                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8621                 if (ret)
8622                         goto out;
8623         }
8624         btrfs_release_path(path);
8625
8626 reinit_data_reloc:
8627         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8628         key.type = BTRFS_ROOT_ITEM_KEY;
8629         key.offset = (u64)-1;
8630         root = btrfs_read_fs_root(fs_info, &key);
8631         if (IS_ERR(root)) {
8632                 fprintf(stderr, "Error reading data reloc tree\n");
8633                 ret = PTR_ERR(root);
8634                 goto out;
8635         }
8636         record_root_in_trans(trans, root);
8637         ret = btrfs_fsck_reinit_root(trans, root, 0);
8638         if (ret)
8639                 goto out;
8640         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8641 out:
8642         btrfs_free_path(path);
8643         return ret;
8644 }
8645
8646 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8647                               struct btrfs_fs_info *fs_info)
8648 {
8649         u64 start = 0;
8650         int ret;
8651
8652         /*
8653          * The only reason we don't do this is because right now we're just
8654          * walking the trees we find and pinning down their bytes, we don't look
8655          * at any of the leaves.  In order to do mixed groups we'd have to check
8656          * the leaves of any fs roots and pin down the bytes for any file
8657          * extents we find.  Not hard but why do it if we don't have to?
8658          */
8659         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8660                 fprintf(stderr, "We don't support re-initing the extent tree "
8661                         "for mixed block groups yet, please notify a btrfs "
8662                         "developer you want to do this so they can add this "
8663                         "functionality.\n");
8664                 return -EINVAL;
8665         }
8666
8667         /*
8668          * first we need to walk all of the trees except the extent tree and pin
8669          * down the bytes that are in use so we don't overwrite any existing
8670          * metadata.
8671          */
8672         ret = pin_metadata_blocks(fs_info);
8673         if (ret) {
8674                 fprintf(stderr, "error pinning down used bytes\n");
8675                 return ret;
8676         }
8677
8678         /*
8679          * Need to drop all the block groups since we're going to recreate all
8680          * of them again.
8681          */
8682         btrfs_free_block_groups(fs_info);
8683         ret = reset_block_groups(fs_info);
8684         if (ret) {
8685                 fprintf(stderr, "error resetting the block groups\n");
8686                 return ret;
8687         }
8688
8689         /* Ok we can allocate now, reinit the extent root */
8690         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8691         if (ret) {
8692                 fprintf(stderr, "extent root initialization failed\n");
8693                 /*
8694                  * When the transaction code is updated we should end the
8695                  * transaction, but for now progs only knows about commit so
8696                  * just return an error.
8697                  */
8698                 return ret;
8699         }
8700
8701         /*
8702          * Now we have all the in-memory block groups setup so we can make
8703          * allocations properly, and the metadata we care about is safe since we
8704          * pinned all of it above.
8705          */
8706         while (1) {
8707                 struct btrfs_block_group_cache *cache;
8708
8709                 cache = btrfs_lookup_first_block_group(fs_info, start);
8710                 if (!cache)
8711                         break;
8712                 start = cache->key.objectid + cache->key.offset;
8713                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8714                                         &cache->key, &cache->item,
8715                                         sizeof(cache->item));
8716                 if (ret) {
8717                         fprintf(stderr, "Error adding block group\n");
8718                         return ret;
8719                 }
8720                 btrfs_extent_post_op(trans, fs_info->extent_root);
8721         }
8722
8723         ret = reset_balance(trans, fs_info);
8724         if (ret)
8725                 fprintf(stderr, "error reseting the pending balance\n");
8726
8727         return ret;
8728 }
8729
8730 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8731 {
8732         struct btrfs_path *path;
8733         struct btrfs_trans_handle *trans;
8734         struct btrfs_key key;
8735         int ret;
8736
8737         printf("Recowing metadata block %llu\n", eb->start);
8738         key.objectid = btrfs_header_owner(eb);
8739         key.type = BTRFS_ROOT_ITEM_KEY;
8740         key.offset = (u64)-1;
8741
8742         root = btrfs_read_fs_root(root->fs_info, &key);
8743         if (IS_ERR(root)) {
8744                 fprintf(stderr, "Couldn't find owner root %llu\n",
8745                         key.objectid);
8746                 return PTR_ERR(root);
8747         }
8748
8749         path = btrfs_alloc_path();
8750         if (!path)
8751                 return -ENOMEM;
8752
8753         trans = btrfs_start_transaction(root, 1);
8754         if (IS_ERR(trans)) {
8755                 btrfs_free_path(path);
8756                 return PTR_ERR(trans);
8757         }
8758
8759         path->lowest_level = btrfs_header_level(eb);
8760         if (path->lowest_level)
8761                 btrfs_node_key_to_cpu(eb, &key, 0);
8762         else
8763                 btrfs_item_key_to_cpu(eb, &key, 0);
8764
8765         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8766         btrfs_commit_transaction(trans, root);
8767         btrfs_free_path(path);
8768         return ret;
8769 }
8770
8771 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8772 {
8773         struct btrfs_path *path;
8774         struct btrfs_trans_handle *trans;
8775         struct btrfs_key key;
8776         int ret;
8777
8778         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8779                bad->key.type, bad->key.offset);
8780         key.objectid = bad->root_id;
8781         key.type = BTRFS_ROOT_ITEM_KEY;
8782         key.offset = (u64)-1;
8783
8784         root = btrfs_read_fs_root(root->fs_info, &key);
8785         if (IS_ERR(root)) {
8786                 fprintf(stderr, "Couldn't find owner root %llu\n",
8787                         key.objectid);
8788                 return PTR_ERR(root);
8789         }
8790
8791         path = btrfs_alloc_path();
8792         if (!path)
8793                 return -ENOMEM;
8794
8795         trans = btrfs_start_transaction(root, 1);
8796         if (IS_ERR(trans)) {
8797                 btrfs_free_path(path);
8798                 return PTR_ERR(trans);
8799         }
8800
8801         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8802         if (ret) {
8803                 if (ret > 0)
8804                         ret = 0;
8805                 goto out;
8806         }
8807         ret = btrfs_del_item(trans, root, path);
8808 out:
8809         btrfs_commit_transaction(trans, root);
8810         btrfs_free_path(path);
8811         return ret;
8812 }
8813
8814 static int zero_log_tree(struct btrfs_root *root)
8815 {
8816         struct btrfs_trans_handle *trans;
8817         int ret;
8818
8819         trans = btrfs_start_transaction(root, 1);
8820         if (IS_ERR(trans)) {
8821                 ret = PTR_ERR(trans);
8822                 return ret;
8823         }
8824         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8825         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8826         ret = btrfs_commit_transaction(trans, root);
8827         return ret;
8828 }
8829
8830 static int populate_csum(struct btrfs_trans_handle *trans,
8831                          struct btrfs_root *csum_root, char *buf, u64 start,
8832                          u64 len)
8833 {
8834         u64 offset = 0;
8835         u64 sectorsize;
8836         int ret = 0;
8837
8838         while (offset < len) {
8839                 sectorsize = csum_root->sectorsize;
8840                 ret = read_extent_data(csum_root, buf, start + offset,
8841                                        &sectorsize, 0);
8842                 if (ret)
8843                         break;
8844                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8845                                             start + offset, buf, sectorsize);
8846                 if (ret)
8847                         break;
8848                 offset += sectorsize;
8849         }
8850         return ret;
8851 }
8852
8853 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8854                                       struct btrfs_root *csum_root,
8855                                       struct btrfs_root *cur_root)
8856 {
8857         struct btrfs_path *path;
8858         struct btrfs_key key;
8859         struct extent_buffer *node;
8860         struct btrfs_file_extent_item *fi;
8861         char *buf = NULL;
8862         u64 start = 0;
8863         u64 len = 0;
8864         int slot = 0;
8865         int ret = 0;
8866
8867         path = btrfs_alloc_path();
8868         if (!path)
8869                 return -ENOMEM;
8870         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8871         if (!buf) {
8872                 ret = -ENOMEM;
8873                 goto out;
8874         }
8875
8876         key.objectid = 0;
8877         key.offset = 0;
8878         key.type = 0;
8879
8880         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8881         if (ret < 0)
8882                 goto out;
8883         /* Iterate all regular file extents and fill its csum */
8884         while (1) {
8885                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8886
8887                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8888                         goto next;
8889                 node = path->nodes[0];
8890                 slot = path->slots[0];
8891                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8892                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8893                         goto next;
8894                 start = btrfs_file_extent_disk_bytenr(node, fi);
8895                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8896
8897                 ret = populate_csum(trans, csum_root, buf, start, len);
8898                 if (ret == -EEXIST)
8899                         ret = 0;
8900                 if (ret < 0)
8901                         goto out;
8902 next:
8903                 /*
8904                  * TODO: if next leaf is corrupted, jump to nearest next valid
8905                  * leaf.
8906                  */
8907                 ret = btrfs_next_item(cur_root, path);
8908                 if (ret < 0)
8909                         goto out;
8910                 if (ret > 0) {
8911                         ret = 0;
8912                         goto out;
8913                 }
8914         }
8915
8916 out:
8917         btrfs_free_path(path);
8918         free(buf);
8919         return ret;
8920 }
8921
8922 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8923                                   struct btrfs_root *csum_root)
8924 {
8925         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8926         struct btrfs_path *path;
8927         struct btrfs_root *tree_root = fs_info->tree_root;
8928         struct btrfs_root *cur_root;
8929         struct extent_buffer *node;
8930         struct btrfs_key key;
8931         int slot = 0;
8932         int ret = 0;
8933
8934         path = btrfs_alloc_path();
8935         if (!path)
8936                 return -ENOMEM;
8937
8938         key.objectid = BTRFS_FS_TREE_OBJECTID;
8939         key.offset = 0;
8940         key.type = BTRFS_ROOT_ITEM_KEY;
8941
8942         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8943         if (ret < 0)
8944                 goto out;
8945         if (ret > 0) {
8946                 ret = -ENOENT;
8947                 goto out;
8948         }
8949
8950         while (1) {
8951                 node = path->nodes[0];
8952                 slot = path->slots[0];
8953                 btrfs_item_key_to_cpu(node, &key, slot);
8954                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8955                         goto out;
8956                 if (key.type != BTRFS_ROOT_ITEM_KEY)
8957                         goto next;
8958                 if (!is_fstree(key.objectid))
8959                         goto next;
8960                 key.offset = (u64)-1;
8961
8962                 cur_root = btrfs_read_fs_root(fs_info, &key);
8963                 if (IS_ERR(cur_root) || !cur_root) {
8964                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8965                                 key.objectid);
8966                         goto out;
8967                 }
8968                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8969                                 cur_root);
8970                 if (ret < 0)
8971                         goto out;
8972 next:
8973                 ret = btrfs_next_item(tree_root, path);
8974                 if (ret > 0) {
8975                         ret = 0;
8976                         goto out;
8977                 }
8978                 if (ret < 0)
8979                         goto out;
8980         }
8981
8982 out:
8983         btrfs_free_path(path);
8984         return ret;
8985 }
8986
8987 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8988                                       struct btrfs_root *csum_root)
8989 {
8990         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8991         struct btrfs_path *path;
8992         struct btrfs_extent_item *ei;
8993         struct extent_buffer *leaf;
8994         char *buf;
8995         struct btrfs_key key;
8996         int ret;
8997
8998         path = btrfs_alloc_path();
8999         if (!path)
9000                 return -ENOMEM;
9001
9002         key.objectid = 0;
9003         key.type = BTRFS_EXTENT_ITEM_KEY;
9004         key.offset = 0;
9005
9006         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9007         if (ret < 0) {
9008                 btrfs_free_path(path);
9009                 return ret;
9010         }
9011
9012         buf = malloc(csum_root->sectorsize);
9013         if (!buf) {
9014                 btrfs_free_path(path);
9015                 return -ENOMEM;
9016         }
9017
9018         while (1) {
9019                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9020                         ret = btrfs_next_leaf(extent_root, path);
9021                         if (ret < 0)
9022                                 break;
9023                         if (ret) {
9024                                 ret = 0;
9025                                 break;
9026                         }
9027                 }
9028                 leaf = path->nodes[0];
9029
9030                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9031                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9032                         path->slots[0]++;
9033                         continue;
9034                 }
9035
9036                 ei = btrfs_item_ptr(leaf, path->slots[0],
9037                                     struct btrfs_extent_item);
9038                 if (!(btrfs_extent_flags(leaf, ei) &
9039                       BTRFS_EXTENT_FLAG_DATA)) {
9040                         path->slots[0]++;
9041                         continue;
9042                 }
9043
9044                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9045                                     key.offset);
9046                 if (ret)
9047                         break;
9048                 path->slots[0]++;
9049         }
9050
9051         btrfs_free_path(path);
9052         free(buf);
9053         return ret;
9054 }
9055
9056 /*
9057  * Recalculate the csum and put it into the csum tree.
9058  *
9059  * Extent tree init will wipe out all the extent info, so in that case, we
9060  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9061  * will use fs/subvol trees to init the csum tree.
9062  */
9063 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9064                           struct btrfs_root *csum_root,
9065                           int search_fs_tree)
9066 {
9067         if (search_fs_tree)
9068                 return fill_csum_tree_from_fs(trans, csum_root);
9069         else
9070                 return fill_csum_tree_from_extent(trans, csum_root);
9071 }
9072
9073 struct root_item_info {
9074         /* level of the root */
9075         u8 level;
9076         /* number of nodes at this level, must be 1 for a root */
9077         int node_count;
9078         u64 bytenr;
9079         u64 gen;
9080         struct cache_extent cache_extent;
9081 };
9082
9083 static struct cache_tree *roots_info_cache = NULL;
9084
9085 static void free_roots_info_cache(void)
9086 {
9087         if (!roots_info_cache)
9088                 return;
9089
9090         while (!cache_tree_empty(roots_info_cache)) {
9091                 struct cache_extent *entry;
9092                 struct root_item_info *rii;
9093
9094                 entry = first_cache_extent(roots_info_cache);
9095                 if (!entry)
9096                         break;
9097                 remove_cache_extent(roots_info_cache, entry);
9098                 rii = container_of(entry, struct root_item_info, cache_extent);
9099                 free(rii);
9100         }
9101
9102         free(roots_info_cache);
9103         roots_info_cache = NULL;
9104 }
9105
9106 static int build_roots_info_cache(struct btrfs_fs_info *info)
9107 {
9108         int ret = 0;
9109         struct btrfs_key key;
9110         struct extent_buffer *leaf;
9111         struct btrfs_path *path;
9112
9113         if (!roots_info_cache) {
9114                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9115                 if (!roots_info_cache)
9116                         return -ENOMEM;
9117                 cache_tree_init(roots_info_cache);
9118         }
9119
9120         path = btrfs_alloc_path();
9121         if (!path)
9122                 return -ENOMEM;
9123
9124         key.objectid = 0;
9125         key.type = BTRFS_EXTENT_ITEM_KEY;
9126         key.offset = 0;
9127
9128         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9129         if (ret < 0)
9130                 goto out;
9131         leaf = path->nodes[0];
9132
9133         while (1) {
9134                 struct btrfs_key found_key;
9135                 struct btrfs_extent_item *ei;
9136                 struct btrfs_extent_inline_ref *iref;
9137                 int slot = path->slots[0];
9138                 int type;
9139                 u64 flags;
9140                 u64 root_id;
9141                 u8 level;
9142                 struct cache_extent *entry;
9143                 struct root_item_info *rii;
9144
9145                 if (slot >= btrfs_header_nritems(leaf)) {
9146                         ret = btrfs_next_leaf(info->extent_root, path);
9147                         if (ret < 0) {
9148                                 break;
9149                         } else if (ret) {
9150                                 ret = 0;
9151                                 break;
9152                         }
9153                         leaf = path->nodes[0];
9154                         slot = path->slots[0];
9155                 }
9156
9157                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9158
9159                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9160                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9161                         goto next;
9162
9163                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9164                 flags = btrfs_extent_flags(leaf, ei);
9165
9166                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9167                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9168                         goto next;
9169
9170                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9171                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9172                         level = found_key.offset;
9173                 } else {
9174                         struct btrfs_tree_block_info *info;
9175
9176                         info = (struct btrfs_tree_block_info *)(ei + 1);
9177                         iref = (struct btrfs_extent_inline_ref *)(info + 1);
9178                         level = btrfs_tree_block_level(leaf, info);
9179                 }
9180
9181                 /*
9182                  * For a root extent, it must be of the following type and the
9183                  * first (and only one) iref in the item.
9184                  */
9185                 type = btrfs_extent_inline_ref_type(leaf, iref);
9186                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9187                         goto next;
9188
9189                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9190                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9191                 if (!entry) {
9192                         rii = malloc(sizeof(struct root_item_info));
9193                         if (!rii) {
9194                                 ret = -ENOMEM;
9195                                 goto out;
9196                         }
9197                         rii->cache_extent.start = root_id;
9198                         rii->cache_extent.size = 1;
9199                         rii->level = (u8)-1;
9200                         entry = &rii->cache_extent;
9201                         ret = insert_cache_extent(roots_info_cache, entry);
9202                         ASSERT(ret == 0);
9203                 } else {
9204                         rii = container_of(entry, struct root_item_info,
9205                                            cache_extent);
9206                 }
9207
9208                 ASSERT(rii->cache_extent.start == root_id);
9209                 ASSERT(rii->cache_extent.size == 1);
9210
9211                 if (level > rii->level || rii->level == (u8)-1) {
9212                         rii->level = level;
9213                         rii->bytenr = found_key.objectid;
9214                         rii->gen = btrfs_extent_generation(leaf, ei);
9215                         rii->node_count = 1;
9216                 } else if (level == rii->level) {
9217                         rii->node_count++;
9218                 }
9219 next:
9220                 path->slots[0]++;
9221         }
9222
9223 out:
9224         btrfs_free_path(path);
9225
9226         return ret;
9227 }
9228
9229 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9230                                   struct btrfs_path *path,
9231                                   const struct btrfs_key *root_key,
9232                                   const int read_only_mode)
9233 {
9234         const u64 root_id = root_key->objectid;
9235         struct cache_extent *entry;
9236         struct root_item_info *rii;
9237         struct btrfs_root_item ri;
9238         unsigned long offset;
9239
9240         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9241         if (!entry) {
9242                 fprintf(stderr,
9243                         "Error: could not find extent items for root %llu\n",
9244                         root_key->objectid);
9245                 return -ENOENT;
9246         }
9247
9248         rii = container_of(entry, struct root_item_info, cache_extent);
9249         ASSERT(rii->cache_extent.start == root_id);
9250         ASSERT(rii->cache_extent.size == 1);
9251
9252         if (rii->node_count != 1) {
9253                 fprintf(stderr,
9254                         "Error: could not find btree root extent for root %llu\n",
9255                         root_id);
9256                 return -ENOENT;
9257         }
9258
9259         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9260         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9261
9262         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9263             btrfs_root_level(&ri) != rii->level ||
9264             btrfs_root_generation(&ri) != rii->gen) {
9265
9266                 /*
9267                  * If we're in repair mode but our caller told us to not update
9268                  * the root item, i.e. just check if it needs to be updated, don't
9269                  * print this message, since the caller will call us again shortly
9270                  * for the same root item without read only mode (the caller will
9271                  * open a transaction first).
9272                  */
9273                 if (!(read_only_mode && repair))
9274                         fprintf(stderr,
9275                                 "%sroot item for root %llu,"
9276                                 " current bytenr %llu, current gen %llu, current level %u,"
9277                                 " new bytenr %llu, new gen %llu, new level %u\n",
9278                                 (read_only_mode ? "" : "fixing "),
9279                                 root_id,
9280                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9281                                 btrfs_root_level(&ri),
9282                                 rii->bytenr, rii->gen, rii->level);
9283
9284                 if (btrfs_root_generation(&ri) > rii->gen) {
9285                         fprintf(stderr,
9286                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9287                                 root_id, btrfs_root_generation(&ri), rii->gen);
9288                         return -EINVAL;
9289                 }
9290
9291                 if (!read_only_mode) {
9292                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9293                         btrfs_set_root_level(&ri, rii->level);
9294                         btrfs_set_root_generation(&ri, rii->gen);
9295                         write_extent_buffer(path->nodes[0], &ri,
9296                                             offset, sizeof(ri));
9297                 }
9298
9299                 return 1;
9300         }
9301
9302         return 0;
9303 }
9304
9305 /*
9306  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9307  * caused read-only snapshots to be corrupted if they were created at a moment
9308  * when the source subvolume/snapshot had orphan items. The issue was that the
9309  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9310  * node instead of the post orphan cleanup root node.
9311  * So this function, and its callees, just detects and fixes those cases. Even
9312  * though the regression was for read-only snapshots, this function applies to
9313  * any snapshot/subvolume root.
9314  * This must be run before any other repair code - not doing it so, makes other
9315  * repair code delete or modify backrefs in the extent tree for example, which
9316  * will result in an inconsistent fs after repairing the root items.
9317  */
9318 static int repair_root_items(struct btrfs_fs_info *info)
9319 {
9320         struct btrfs_path *path = NULL;
9321         struct btrfs_key key;
9322         struct extent_buffer *leaf;
9323         struct btrfs_trans_handle *trans = NULL;
9324         int ret = 0;
9325         int bad_roots = 0;
9326         int need_trans = 0;
9327
9328         ret = build_roots_info_cache(info);
9329         if (ret)
9330                 goto out;
9331
9332         path = btrfs_alloc_path();
9333         if (!path) {
9334                 ret = -ENOMEM;
9335                 goto out;
9336         }
9337
9338         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9339         key.type = BTRFS_ROOT_ITEM_KEY;
9340         key.offset = 0;
9341
9342 again:
9343         /*
9344          * Avoid opening and committing transactions if a leaf doesn't have
9345          * any root items that need to be fixed, so that we avoid rotating
9346          * backup roots unnecessarily.
9347          */
9348         if (need_trans) {
9349                 trans = btrfs_start_transaction(info->tree_root, 1);
9350                 if (IS_ERR(trans)) {
9351                         ret = PTR_ERR(trans);
9352                         goto out;
9353                 }
9354         }
9355
9356         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9357                                 0, trans ? 1 : 0);
9358         if (ret < 0)
9359                 goto out;
9360         leaf = path->nodes[0];
9361
9362         while (1) {
9363                 struct btrfs_key found_key;
9364
9365                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9366                         int no_more_keys = find_next_key(path, &key);
9367
9368                         btrfs_release_path(path);
9369                         if (trans) {
9370                                 ret = btrfs_commit_transaction(trans,
9371                                                                info->tree_root);
9372                                 trans = NULL;
9373                                 if (ret < 0)
9374                                         goto out;
9375                         }
9376                         need_trans = 0;
9377                         if (no_more_keys)
9378                                 break;
9379                         goto again;
9380                 }
9381
9382                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9383
9384                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9385                         goto next;
9386                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9387                         goto next;
9388
9389                 ret = maybe_repair_root_item(info, path, &found_key,
9390                                              trans ? 0 : 1);
9391                 if (ret < 0)
9392                         goto out;
9393                 if (ret) {
9394                         if (!trans && repair) {
9395                                 need_trans = 1;
9396                                 key = found_key;
9397                                 btrfs_release_path(path);
9398                                 goto again;
9399                         }
9400                         bad_roots++;
9401                 }
9402 next:
9403                 path->slots[0]++;
9404         }
9405         ret = 0;
9406 out:
9407         free_roots_info_cache();
9408         btrfs_free_path(path);
9409         if (trans)
9410                 btrfs_commit_transaction(trans, info->tree_root);
9411         if (ret < 0)
9412                 return ret;
9413
9414         return bad_roots;
9415 }
9416
9417 const char * const cmd_check_usage[] = {
9418         "btrfs check [options] <device>",
9419         "Check structural inegrity of a filesystem (unmounted).",
9420         "Check structural inegrity of an unmounted filesystem. Verify internal",
9421         "trees' consistency and item connectivity. In the repair mode try to",
9422         "fix the problems found.",
9423         "WARNING: the repair mode is considered dangerous",
9424         "",
9425         "-s|--super <superblock>     use this superblock copy",
9426         "-b|--backup                 use the backup root copy",
9427         "--repair                    try to repair the filesystem",
9428         "--readonly                  run in read-only mode (default)",
9429         "--init-csum-tree            create a new CRC tree",
9430         "--init-extent-tree          create a new extent tree",
9431         "--check-data-csum           verify checkums of data blocks",
9432         "-Q|--qgroup-report           print a report on qgroup consistency",
9433         "-E|--subvol-extents <subvolid>",
9434         "                            print subvolume extents and sharing state",
9435         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9436         "-p|--progress               indicate progress",
9437         NULL
9438 };
9439
9440 int cmd_check(int argc, char **argv)
9441 {
9442         struct cache_tree root_cache;
9443         struct btrfs_root *root;
9444         struct btrfs_fs_info *info;
9445         u64 bytenr = 0;
9446         u64 subvolid = 0;
9447         u64 tree_root_bytenr = 0;
9448         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9449         int ret;
9450         u64 num;
9451         int init_csum_tree = 0;
9452         int readonly = 0;
9453         int qgroup_report = 0;
9454         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9455
9456         while(1) {
9457                 int c;
9458                 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9459                         OPT_CHECK_CSUM, OPT_READONLY };
9460                 static const struct option long_options[] = {
9461                         { "super", required_argument, NULL, 's' },
9462                         { "repair", no_argument, NULL, OPT_REPAIR },
9463                         { "readonly", no_argument, NULL, OPT_READONLY },
9464                         { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9465                         { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9466                         { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9467                         { "backup", no_argument, NULL, 'b' },
9468                         { "subvol-extents", required_argument, NULL, 'E' },
9469                         { "qgroup-report", no_argument, NULL, 'Q' },
9470                         { "tree-root", required_argument, NULL, 'r' },
9471                         { "progress", no_argument, NULL, 'p' },
9472                         { NULL, 0, NULL, 0}
9473                 };
9474
9475                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9476                 if (c < 0)
9477                         break;
9478                 switch(c) {
9479                         case 'a': /* ignored */ break;
9480                         case 'b':
9481                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9482                                 break;
9483                         case 's':
9484                                 num = arg_strtou64(optarg);
9485                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9486                                         fprintf(stderr,
9487                                                 "ERROR: super mirror should be less than: %d\n",
9488                                                 BTRFS_SUPER_MIRROR_MAX);
9489                                         exit(1);
9490                                 }
9491                                 bytenr = btrfs_sb_offset(((int)num));
9492                                 printf("using SB copy %llu, bytenr %llu\n", num,
9493                                        (unsigned long long)bytenr);
9494                                 break;
9495                         case 'Q':
9496                                 qgroup_report = 1;
9497                                 break;
9498                         case 'E':
9499                                 subvolid = arg_strtou64(optarg);
9500                                 break;
9501                         case 'r':
9502                                 tree_root_bytenr = arg_strtou64(optarg);
9503                                 break;
9504                         case 'p':
9505                                 ctx.progress_enabled = true;
9506                                 break;
9507                         case '?':
9508                         case 'h':
9509                                 usage(cmd_check_usage);
9510                         case OPT_REPAIR:
9511                                 printf("enabling repair mode\n");
9512                                 repair = 1;
9513                                 ctree_flags |= OPEN_CTREE_WRITES;
9514                                 break;
9515                         case OPT_READONLY:
9516                                 readonly = 1;
9517                                 break;
9518                         case OPT_INIT_CSUM:
9519                                 printf("Creating a new CRC tree\n");
9520                                 init_csum_tree = 1;
9521                                 repair = 1;
9522                                 ctree_flags |= OPEN_CTREE_WRITES;
9523                                 break;
9524                         case OPT_INIT_EXTENT:
9525                                 init_extent_tree = 1;
9526                                 ctree_flags |= (OPEN_CTREE_WRITES |
9527                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9528                                 repair = 1;
9529                                 break;
9530                         case OPT_CHECK_CSUM:
9531                                 check_data_csum = 1;
9532                                 break;
9533                 }
9534         }
9535         argc = argc - optind;
9536
9537         if (check_argc_exact(argc, 1))
9538                 usage(cmd_check_usage);
9539
9540         if (ctx.progress_enabled) {
9541                 ctx.tp = TASK_NOTHING;
9542                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9543         }
9544
9545         /* This check is the only reason for --readonly to exist */
9546         if (readonly && repair) {
9547                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9548                 exit(1);
9549         }
9550
9551         radix_tree_init();
9552         cache_tree_init(&root_cache);
9553
9554         if((ret = check_mounted(argv[optind])) < 0) {
9555                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9556                 goto err_out;
9557         } else if(ret) {
9558                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9559                 ret = -EBUSY;
9560                 goto err_out;
9561         }
9562
9563         /* only allow partial opening under repair mode */
9564         if (repair)
9565                 ctree_flags |= OPEN_CTREE_PARTIAL;
9566
9567         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9568                                   ctree_flags);
9569         if (!info) {
9570                 fprintf(stderr, "Couldn't open file system\n");
9571                 ret = -EIO;
9572                 goto err_out;
9573         }
9574
9575         global_info = info;
9576         root = info->fs_root;
9577
9578         /*
9579          * repair mode will force us to commit transaction which
9580          * will make us fail to load log tree when mounting.
9581          */
9582         if (repair && btrfs_super_log_root(info->super_copy)) {
9583                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9584                 if (!ret) {
9585                         ret = 1;
9586                         goto close_out;
9587                 }
9588                 ret = zero_log_tree(root);
9589                 if (ret) {
9590                         fprintf(stderr, "fail to zero log tree\n");
9591                         goto close_out;
9592                 }
9593         }
9594
9595         uuid_unparse(info->super_copy->fsid, uuidbuf);
9596         if (qgroup_report) {
9597                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9598                        uuidbuf);
9599                 ret = qgroup_verify_all(info);
9600                 if (ret == 0)
9601                         print_qgroup_report(1);
9602                 goto close_out;
9603         }
9604         if (subvolid) {
9605                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9606                        subvolid, argv[optind], uuidbuf);
9607                 ret = print_extent_state(info, subvolid);
9608                 goto close_out;
9609         }
9610         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9611
9612         if (!extent_buffer_uptodate(info->tree_root->node) ||
9613             !extent_buffer_uptodate(info->dev_root->node) ||
9614             !extent_buffer_uptodate(info->chunk_root->node)) {
9615                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9616                 ret = -EIO;
9617                 goto close_out;
9618         }
9619
9620         if (init_extent_tree || init_csum_tree) {
9621                 struct btrfs_trans_handle *trans;
9622
9623                 trans = btrfs_start_transaction(info->extent_root, 0);
9624                 if (IS_ERR(trans)) {
9625                         fprintf(stderr, "Error starting transaction\n");
9626                         ret = PTR_ERR(trans);
9627                         goto close_out;
9628                 }
9629
9630                 if (init_extent_tree) {
9631                         printf("Creating a new extent tree\n");
9632                         ret = reinit_extent_tree(trans, info);
9633                         if (ret)
9634                                 goto close_out;
9635                 }
9636
9637                 if (init_csum_tree) {
9638                         fprintf(stderr, "Reinit crc root\n");
9639                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9640                         if (ret) {
9641                                 fprintf(stderr, "crc root initialization failed\n");
9642                                 ret = -EIO;
9643                                 goto close_out;
9644                         }
9645
9646                         ret = fill_csum_tree(trans, info->csum_root,
9647                                              init_extent_tree);
9648                         if (ret) {
9649                                 fprintf(stderr, "crc refilling failed\n");
9650                                 return -EIO;
9651                         }
9652                 }
9653                 /*
9654                  * Ok now we commit and run the normal fsck, which will add
9655                  * extent entries for all of the items it finds.
9656                  */
9657                 ret = btrfs_commit_transaction(trans, info->extent_root);
9658                 if (ret)
9659                         goto close_out;
9660         }
9661         if (!extent_buffer_uptodate(info->extent_root->node)) {
9662                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9663                 ret = -EIO;
9664                 goto close_out;
9665         }
9666         if (!extent_buffer_uptodate(info->csum_root->node)) {
9667                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9668                 ret = -EIO;
9669                 goto close_out;
9670         }
9671
9672         if (!ctx.progress_enabled)
9673                 fprintf(stderr, "checking extents\n");
9674         ret = check_chunks_and_extents(root);
9675         if (ret)
9676                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9677
9678         ret = repair_root_items(info);
9679         if (ret < 0)
9680                 goto close_out;
9681         if (repair) {
9682                 fprintf(stderr, "Fixed %d roots.\n", ret);
9683                 ret = 0;
9684         } else if (ret > 0) {
9685                 fprintf(stderr,
9686                        "Found %d roots with an outdated root item.\n",
9687                        ret);
9688                 fprintf(stderr,
9689                         "Please run a filesystem check with the option --repair to fix them.\n");
9690                 ret = 1;
9691                 goto close_out;
9692         }
9693
9694         if (!ctx.progress_enabled)
9695                 fprintf(stderr, "checking free space cache\n");
9696         ret = check_space_cache(root);
9697         if (ret)
9698                 goto out;
9699
9700         /*
9701          * We used to have to have these hole extents in between our real
9702          * extents so if we don't have this flag set we need to make sure there
9703          * are no gaps in the file extents for inodes, otherwise we can just
9704          * ignore it when this happens.
9705          */
9706         no_holes = btrfs_fs_incompat(root->fs_info,
9707                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9708         if (!ctx.progress_enabled)
9709                 fprintf(stderr, "checking fs roots\n");
9710         ret = check_fs_roots(root, &root_cache);
9711         if (ret)
9712                 goto out;
9713
9714         fprintf(stderr, "checking csums\n");
9715         ret = check_csums(root);
9716         if (ret)
9717                 goto out;
9718
9719         fprintf(stderr, "checking root refs\n");
9720         ret = check_root_refs(root, &root_cache);
9721         if (ret)
9722                 goto out;
9723
9724         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9725                 struct extent_buffer *eb;
9726
9727                 eb = list_first_entry(&root->fs_info->recow_ebs,
9728                                       struct extent_buffer, recow);
9729                 list_del_init(&eb->recow);
9730                 ret = recow_extent_buffer(root, eb);
9731                 if (ret)
9732                         break;
9733         }
9734
9735         while (!list_empty(&delete_items)) {
9736                 struct bad_item *bad;
9737
9738                 bad = list_first_entry(&delete_items, struct bad_item, list);
9739                 list_del_init(&bad->list);
9740                 if (repair)
9741                         ret = delete_bad_item(root, bad);
9742                 free(bad);
9743         }
9744
9745         if (info->quota_enabled) {
9746                 int err;
9747                 fprintf(stderr, "checking quota groups\n");
9748                 err = qgroup_verify_all(info);
9749                 if (err)
9750                         goto out;
9751         }
9752
9753         if (!list_empty(&root->fs_info->recow_ebs)) {
9754                 fprintf(stderr, "Transid errors in file system\n");
9755                 ret = 1;
9756         }
9757 out:
9758         print_qgroup_report(0);
9759         if (found_old_backref) { /*
9760                  * there was a disk format change when mixed
9761                  * backref was in testing tree. The old format
9762                  * existed about one week.
9763                  */
9764                 printf("\n * Found old mixed backref format. "
9765                        "The old format is not supported! *"
9766                        "\n * Please mount the FS in readonly mode, "
9767                        "backup data and re-format the FS. *\n\n");
9768                 ret = 1;
9769         }
9770         printf("found %llu bytes used err is %d\n",
9771                (unsigned long long)bytes_used, ret);
9772         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9773         printf("total tree bytes: %llu\n",
9774                (unsigned long long)total_btree_bytes);
9775         printf("total fs tree bytes: %llu\n",
9776                (unsigned long long)total_fs_tree_bytes);
9777         printf("total extent tree bytes: %llu\n",
9778                (unsigned long long)total_extent_tree_bytes);
9779         printf("btree space waste bytes: %llu\n",
9780                (unsigned long long)btree_space_waste);
9781         printf("file data blocks allocated: %llu\n referenced %llu\n",
9782                 (unsigned long long)data_bytes_allocated,
9783                 (unsigned long long)data_bytes_referenced);
9784
9785         free_root_recs_tree(&root_cache);
9786 close_out:
9787         close_ctree(root);
9788 err_out:
9789         if (ctx.progress_enabled)
9790                 task_deinit(ctx.info);
9791
9792         return ret;
9793 }