btrfs-progs: return errors from clone_inode_rec, fail in callers
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "btrfsck.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
41 #include "backref.h"
42 #include "ulist.h"
43
44 enum task_position {
45         TASK_EXTENTS,
46         TASK_FREE_SPACE,
47         TASK_FS_ROOTS,
48         TASK_NOTHING, /* have to be the last element */
49 };
50
51 struct task_ctx {
52         int progress_enabled;
53         enum task_position tp;
54
55         struct task_info *info;
56 };
57
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75
76 static void *print_status_check(void *p)
77 {
78         struct task_ctx *priv = p;
79         const char work_indicator[] = { '.', 'o', 'O', 'o' };
80         uint32_t count = 0;
81         static char *task_position_string[] = {
82                 "checking extents",
83                 "checking free space cache",
84                 "checking fs roots",
85         };
86
87         task_period_start(priv->info, 1000 /* 1s */);
88
89         if (priv->tp == TASK_NOTHING)
90                 return NULL;
91
92         while (1) {
93                 printf("%s [%c]\r", task_position_string[priv->tp],
94                                 work_indicator[count % 4]);
95                 count++;
96                 fflush(stdout);
97                 task_period_wait(priv->info);
98         }
99         return NULL;
100 }
101
102 static int print_status_return(void *p)
103 {
104         printf("\n");
105         fflush(stdout);
106
107         return 0;
108 }
109
110 struct extent_backref {
111         struct list_head list;
112         unsigned int is_data:1;
113         unsigned int found_extent_tree:1;
114         unsigned int full_backref:1;
115         unsigned int found_ref:1;
116         unsigned int broken:1;
117 };
118
119 struct data_backref {
120         struct extent_backref node;
121         union {
122                 u64 parent;
123                 u64 root;
124         };
125         u64 owner;
126         u64 offset;
127         u64 disk_bytenr;
128         u64 bytes;
129         u64 ram_bytes;
130         u32 num_refs;
131         u32 found_ref;
132 };
133
134 /*
135  * Much like data_backref, just removed the undetermined members
136  * and change it to use list_head.
137  * During extent scan, it is stored in root->orphan_data_extent.
138  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
139  */
140 struct orphan_data_extent {
141         struct list_head list;
142         u64 root;
143         u64 objectid;
144         u64 offset;
145         u64 disk_bytenr;
146         u64 disk_len;
147 };
148
149 struct tree_backref {
150         struct extent_backref node;
151         union {
152                 u64 parent;
153                 u64 root;
154         };
155 };
156
157 struct extent_record {
158         struct list_head backrefs;
159         struct list_head dups;
160         struct list_head list;
161         struct cache_extent cache;
162         struct btrfs_disk_key parent_key;
163         u64 start;
164         u64 max_size;
165         u64 nr;
166         u64 refs;
167         u64 extent_item_refs;
168         u64 generation;
169         u64 parent_generation;
170         u64 info_objectid;
171         u32 num_duplicates;
172         u8 info_level;
173         int flag_block_full_backref;
174         unsigned int found_rec:1;
175         unsigned int content_checked:1;
176         unsigned int owner_ref_checked:1;
177         unsigned int is_root:1;
178         unsigned int metadata:1;
179         unsigned int bad_full_backref:1;
180         unsigned int crossing_stripes:1;
181         unsigned int wrong_chunk_type:1;
182 };
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         unsigned int filetype:8;
190         int errors;
191         unsigned int ref_type;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 struct root_item_record {
199         struct list_head list;
200         u64 objectid;
201         u64 bytenr;
202         u64 last_snapshot;
203         u8 level;
204         u8 drop_level;
205         int level_size;
206         struct btrfs_key drop_key;
207 };
208
209 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
211 #define REF_ERR_NO_INODE_REF            (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
214 #define REF_ERR_DUP_INODE_REF           (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF             (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
222
223 struct file_extent_hole {
224         struct rb_node node;
225         u64 start;
226         u64 len;
227 };
228
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
231 {
232         struct file_extent_hole *hole;
233
234         if (RB_EMPTY_ROOT(holes))
235                 return (u64)-1;
236
237         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
238         return hole->start;
239 }
240
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
242 {
243         struct file_extent_hole *hole1;
244         struct file_extent_hole *hole2;
245
246         hole1 = rb_entry(node1, struct file_extent_hole, node);
247         hole2 = rb_entry(node2, struct file_extent_hole, node);
248
249         if (hole1->start > hole2->start)
250                 return -1;
251         if (hole1->start < hole2->start)
252                 return 1;
253         /* Now hole1->start == hole2->start */
254         if (hole1->len >= hole2->len)
255                 /*
256                  * Hole 1 will be merge center
257                  * Same hole will be merged later
258                  */
259                 return -1;
260         /* Hole 2 will be merge center */
261         return 1;
262 }
263
264 /*
265  * Add a hole to the record
266  *
267  * This will do hole merge for copy_file_extent_holes(),
268  * which will ensure there won't be continuous holes.
269  */
270 static int add_file_extent_hole(struct rb_root *holes,
271                                 u64 start, u64 len)
272 {
273         struct file_extent_hole *hole;
274         struct file_extent_hole *prev = NULL;
275         struct file_extent_hole *next = NULL;
276
277         hole = malloc(sizeof(*hole));
278         if (!hole)
279                 return -ENOMEM;
280         hole->start = start;
281         hole->len = len;
282         /* Since compare will not return 0, no -EEXIST will happen */
283         rb_insert(holes, &hole->node, compare_hole);
284
285         /* simple merge with previous hole */
286         if (rb_prev(&hole->node))
287                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
288                                 node);
289         if (prev && prev->start + prev->len >= hole->start) {
290                 hole->len = hole->start + hole->len - prev->start;
291                 hole->start = prev->start;
292                 rb_erase(&prev->node, holes);
293                 free(prev);
294                 prev = NULL;
295         }
296
297         /* iterate merge with next holes */
298         while (1) {
299                 if (!rb_next(&hole->node))
300                         break;
301                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
302                                         node);
303                 if (hole->start + hole->len >= next->start) {
304                         if (hole->start + hole->len <= next->start + next->len)
305                                 hole->len = next->start + next->len -
306                                             hole->start;
307                         rb_erase(&next->node, holes);
308                         free(next);
309                         next = NULL;
310                 } else
311                         break;
312         }
313         return 0;
314 }
315
316 static int compare_hole_range(struct rb_node *node, void *data)
317 {
318         struct file_extent_hole *hole;
319         u64 start;
320
321         hole = (struct file_extent_hole *)data;
322         start = hole->start;
323
324         hole = rb_entry(node, struct file_extent_hole, node);
325         if (start < hole->start)
326                 return -1;
327         if (start >= hole->start && start < hole->start + hole->len)
328                 return 0;
329         return 1;
330 }
331
332 /*
333  * Delete a hole in the record
334  *
335  * This will do the hole split and is much restrict than add.
336  */
337 static int del_file_extent_hole(struct rb_root *holes,
338                                 u64 start, u64 len)
339 {
340         struct file_extent_hole *hole;
341         struct file_extent_hole tmp;
342         u64 prev_start = 0;
343         u64 prev_len = 0;
344         u64 next_start = 0;
345         u64 next_len = 0;
346         struct rb_node *node;
347         int have_prev = 0;
348         int have_next = 0;
349         int ret = 0;
350
351         tmp.start = start;
352         tmp.len = len;
353         node = rb_search(holes, &tmp, compare_hole_range, NULL);
354         if (!node)
355                 return -EEXIST;
356         hole = rb_entry(node, struct file_extent_hole, node);
357         if (start + len > hole->start + hole->len)
358                 return -EEXIST;
359
360         /*
361          * Now there will be no overflap, delete the hole and re-add the
362          * split(s) if they exists.
363          */
364         if (start > hole->start) {
365                 prev_start = hole->start;
366                 prev_len = start - hole->start;
367                 have_prev = 1;
368         }
369         if (hole->start + hole->len > start + len) {
370                 next_start = start + len;
371                 next_len = hole->start + hole->len - start - len;
372                 have_next = 1;
373         }
374         rb_erase(node, holes);
375         free(hole);
376         if (have_prev) {
377                 ret = add_file_extent_hole(holes, prev_start, prev_len);
378                 if (ret < 0)
379                         return ret;
380         }
381         if (have_next) {
382                 ret = add_file_extent_hole(holes, next_start, next_len);
383                 if (ret < 0)
384                         return ret;
385         }
386         return 0;
387 }
388
389 static int copy_file_extent_holes(struct rb_root *dst,
390                                   struct rb_root *src)
391 {
392         struct file_extent_hole *hole;
393         struct rb_node *node;
394         int ret = 0;
395
396         node = rb_first(src);
397         while (node) {
398                 hole = rb_entry(node, struct file_extent_hole, node);
399                 ret = add_file_extent_hole(dst, hole->start, hole->len);
400                 if (ret)
401                         break;
402                 node = rb_next(node);
403         }
404         return ret;
405 }
406
407 static void free_file_extent_holes(struct rb_root *holes)
408 {
409         struct rb_node *node;
410         struct file_extent_hole *hole;
411
412         node = rb_first(holes);
413         while (node) {
414                 hole = rb_entry(node, struct file_extent_hole, node);
415                 rb_erase(node, holes);
416                 free(hole);
417                 node = rb_first(holes);
418         }
419 }
420
421 struct inode_record {
422         struct list_head backrefs;
423         unsigned int checked:1;
424         unsigned int merging:1;
425         unsigned int found_inode_item:1;
426         unsigned int found_dir_item:1;
427         unsigned int found_file_extent:1;
428         unsigned int found_csum_item:1;
429         unsigned int some_csum_missing:1;
430         unsigned int nodatasum:1;
431         int errors;
432
433         u64 ino;
434         u32 nlink;
435         u32 imode;
436         u64 isize;
437         u64 nbytes;
438
439         u32 found_link;
440         u64 found_size;
441         u64 extent_start;
442         u64 extent_end;
443         struct rb_root holes;
444         struct list_head orphan_extents;
445
446         u32 refs;
447 };
448
449 #define I_ERR_NO_INODE_ITEM             (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
464
465 struct root_backref {
466         struct list_head list;
467         unsigned int found_dir_item:1;
468         unsigned int found_dir_index:1;
469         unsigned int found_back_ref:1;
470         unsigned int found_forward_ref:1;
471         unsigned int reachable:1;
472         int errors;
473         u64 ref_root;
474         u64 dir;
475         u64 index;
476         u16 namelen;
477         char name[0];
478 };
479
480 struct root_record {
481         struct list_head backrefs;
482         struct cache_extent cache;
483         unsigned int found_root_item:1;
484         u64 objectid;
485         u32 found_ref;
486 };
487
488 struct ptr_node {
489         struct cache_extent cache;
490         void *data;
491 };
492
493 struct shared_node {
494         struct cache_extent cache;
495         struct cache_tree root_cache;
496         struct cache_tree inode_cache;
497         struct inode_record *current;
498         u32 refs;
499 };
500
501 struct block_info {
502         u64 start;
503         u32 size;
504 };
505
506 struct walk_control {
507         struct cache_tree shared;
508         struct shared_node *nodes[BTRFS_MAX_LEVEL];
509         int active_node;
510         int root_level;
511 };
512
513 struct bad_item {
514         struct btrfs_key key;
515         u64 root_id;
516         struct list_head list;
517 };
518
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
520
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522                                  struct btrfs_root *root)
523 {
524         if (root->last_trans != trans->transid) {
525                 root->track_dirty = 1;
526                 root->last_trans = trans->transid;
527                 root->commit_root = root->node;
528                 extent_buffer_get(root->node);
529         }
530 }
531
532 static u8 imode_to_type(u32 imode)
533 {
534 #define S_SHIFT 12
535         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
537                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
538                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
539                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
540                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
541                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
542                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
543         };
544
545         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
546 #undef S_SHIFT
547 }
548
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
550 {
551         struct device_record *rec1;
552         struct device_record *rec2;
553
554         rec1 = rb_entry(node1, struct device_record, node);
555         rec2 = rb_entry(node2, struct device_record, node);
556         if (rec1->devid > rec2->devid)
557                 return -1;
558         else if (rec1->devid < rec2->devid)
559                 return 1;
560         else
561                 return 0;
562 }
563
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
565 {
566         struct inode_record *rec;
567         struct inode_backref *backref;
568         struct inode_backref *orig;
569         struct inode_backref *tmp;
570         struct orphan_data_extent *src_orphan;
571         struct orphan_data_extent *dst_orphan;
572         size_t size;
573         int ret;
574
575         rec = malloc(sizeof(*rec));
576         if (!rec)
577                 return ERR_PTR(-ENOMEM);
578         memcpy(rec, orig_rec, sizeof(*rec));
579         rec->refs = 1;
580         INIT_LIST_HEAD(&rec->backrefs);
581         INIT_LIST_HEAD(&rec->orphan_extents);
582         rec->holes = RB_ROOT;
583
584         list_for_each_entry(orig, &orig_rec->backrefs, list) {
585                 size = sizeof(*orig) + orig->namelen + 1;
586                 backref = malloc(size);
587                 if (!backref) {
588                         ret = -ENOMEM;
589                         goto cleanup;
590                 }
591                 memcpy(backref, orig, size);
592                 list_add_tail(&backref->list, &rec->backrefs);
593         }
594         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
595                 dst_orphan = malloc(sizeof(*dst_orphan));
596                 if (!dst_orphan) {
597                         ret = -ENOMEM;
598                         goto cleanup;
599                 }
600                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
601                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
602         }
603         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
604         BUG_ON(ret < 0);
605
606         return rec;
607
608 cleanup:
609         if (!list_empty(&rec->backrefs))
610                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
611                         list_del(&orig->list);
612                         free(orig);
613                 }
614
615         if (!list_empty(&rec->orphan_extents))
616                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
617                         list_del(&orig->list);
618                         free(orig);
619                 }
620
621         free(rec);
622
623         return ERR_PTR(ret);
624 }
625
626 static void print_orphan_data_extents(struct list_head *orphan_extents,
627                                       u64 objectid)
628 {
629         struct orphan_data_extent *orphan;
630
631         if (list_empty(orphan_extents))
632                 return;
633         printf("The following data extent is lost in tree %llu:\n",
634                objectid);
635         list_for_each_entry(orphan, orphan_extents, list) {
636                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
637                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
638                        orphan->disk_len);
639         }
640 }
641
642 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
643 {
644         u64 root_objectid = root->root_key.objectid;
645         int errors = rec->errors;
646
647         if (!errors)
648                 return;
649         /* reloc root errors, we print its corresponding fs root objectid*/
650         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
651                 root_objectid = root->root_key.offset;
652                 fprintf(stderr, "reloc");
653         }
654         fprintf(stderr, "root %llu inode %llu errors %x",
655                 (unsigned long long) root_objectid,
656                 (unsigned long long) rec->ino, rec->errors);
657
658         if (errors & I_ERR_NO_INODE_ITEM)
659                 fprintf(stderr, ", no inode item");
660         if (errors & I_ERR_NO_ORPHAN_ITEM)
661                 fprintf(stderr, ", no orphan item");
662         if (errors & I_ERR_DUP_INODE_ITEM)
663                 fprintf(stderr, ", dup inode item");
664         if (errors & I_ERR_DUP_DIR_INDEX)
665                 fprintf(stderr, ", dup dir index");
666         if (errors & I_ERR_ODD_DIR_ITEM)
667                 fprintf(stderr, ", odd dir item");
668         if (errors & I_ERR_ODD_FILE_EXTENT)
669                 fprintf(stderr, ", odd file extent");
670         if (errors & I_ERR_BAD_FILE_EXTENT)
671                 fprintf(stderr, ", bad file extent");
672         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
673                 fprintf(stderr, ", file extent overlap");
674         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
675                 fprintf(stderr, ", file extent discount");
676         if (errors & I_ERR_DIR_ISIZE_WRONG)
677                 fprintf(stderr, ", dir isize wrong");
678         if (errors & I_ERR_FILE_NBYTES_WRONG)
679                 fprintf(stderr, ", nbytes wrong");
680         if (errors & I_ERR_ODD_CSUM_ITEM)
681                 fprintf(stderr, ", odd csum item");
682         if (errors & I_ERR_SOME_CSUM_MISSING)
683                 fprintf(stderr, ", some csum missing");
684         if (errors & I_ERR_LINK_COUNT_WRONG)
685                 fprintf(stderr, ", link count wrong");
686         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
687                 fprintf(stderr, ", orphan file extent");
688         fprintf(stderr, "\n");
689         /* Print the orphan extents if needed */
690         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
691                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
692
693         /* Print the holes if needed */
694         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
695                 struct file_extent_hole *hole;
696                 struct rb_node *node;
697                 int found = 0;
698
699                 node = rb_first(&rec->holes);
700                 fprintf(stderr, "Found file extent holes:\n");
701                 while (node) {
702                         found = 1;
703                         hole = rb_entry(node, struct file_extent_hole, node);
704                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
705                                 hole->start, hole->len);
706                         node = rb_next(node);
707                 }
708                 if (!found)
709                         fprintf(stderr, "\tstart: 0, len: %llu\n",
710                                 round_up(rec->isize, root->sectorsize));
711         }
712 }
713
714 static void print_ref_error(int errors)
715 {
716         if (errors & REF_ERR_NO_DIR_ITEM)
717                 fprintf(stderr, ", no dir item");
718         if (errors & REF_ERR_NO_DIR_INDEX)
719                 fprintf(stderr, ", no dir index");
720         if (errors & REF_ERR_NO_INODE_REF)
721                 fprintf(stderr, ", no inode ref");
722         if (errors & REF_ERR_DUP_DIR_ITEM)
723                 fprintf(stderr, ", dup dir item");
724         if (errors & REF_ERR_DUP_DIR_INDEX)
725                 fprintf(stderr, ", dup dir index");
726         if (errors & REF_ERR_DUP_INODE_REF)
727                 fprintf(stderr, ", dup inode ref");
728         if (errors & REF_ERR_INDEX_UNMATCH)
729                 fprintf(stderr, ", index unmatch");
730         if (errors & REF_ERR_FILETYPE_UNMATCH)
731                 fprintf(stderr, ", filetype unmatch");
732         if (errors & REF_ERR_NAME_TOO_LONG)
733                 fprintf(stderr, ", name too long");
734         if (errors & REF_ERR_NO_ROOT_REF)
735                 fprintf(stderr, ", no root ref");
736         if (errors & REF_ERR_NO_ROOT_BACKREF)
737                 fprintf(stderr, ", no root backref");
738         if (errors & REF_ERR_DUP_ROOT_REF)
739                 fprintf(stderr, ", dup root ref");
740         if (errors & REF_ERR_DUP_ROOT_BACKREF)
741                 fprintf(stderr, ", dup root backref");
742         fprintf(stderr, "\n");
743 }
744
745 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
746                                           u64 ino, int mod)
747 {
748         struct ptr_node *node;
749         struct cache_extent *cache;
750         struct inode_record *rec = NULL;
751         int ret;
752
753         cache = lookup_cache_extent(inode_cache, ino, 1);
754         if (cache) {
755                 node = container_of(cache, struct ptr_node, cache);
756                 rec = node->data;
757                 if (mod && rec->refs > 1) {
758                         node->data = clone_inode_rec(rec);
759                         if (IS_ERR(node->data))
760                                 return node->data;
761                         rec->refs--;
762                         rec = node->data;
763                 }
764         } else if (mod) {
765                 rec = calloc(1, sizeof(*rec));
766                 if (!rec)
767                         return ERR_PTR(-ENOMEM);
768                 rec->ino = ino;
769                 rec->extent_start = (u64)-1;
770                 rec->refs = 1;
771                 INIT_LIST_HEAD(&rec->backrefs);
772                 INIT_LIST_HEAD(&rec->orphan_extents);
773                 rec->holes = RB_ROOT;
774
775                 node = malloc(sizeof(*node));
776                 if (!node) {
777                         free(rec);
778                         return ERR_PTR(-ENOMEM);
779                 }
780                 node->cache.start = ino;
781                 node->cache.size = 1;
782                 node->data = rec;
783
784                 if (ino == BTRFS_FREE_INO_OBJECTID)
785                         rec->found_link = 1;
786
787                 ret = insert_cache_extent(inode_cache, &node->cache);
788                 if (ret)
789                         return ERR_PTR(-EEXIST);
790         }
791         return rec;
792 }
793
794 static void free_orphan_data_extents(struct list_head *orphan_extents)
795 {
796         struct orphan_data_extent *orphan;
797
798         while (!list_empty(orphan_extents)) {
799                 orphan = list_entry(orphan_extents->next,
800                                     struct orphan_data_extent, list);
801                 list_del(&orphan->list);
802                 free(orphan);
803         }
804 }
805
806 static void free_inode_rec(struct inode_record *rec)
807 {
808         struct inode_backref *backref;
809
810         if (--rec->refs > 0)
811                 return;
812
813         while (!list_empty(&rec->backrefs)) {
814                 backref = list_entry(rec->backrefs.next,
815                                      struct inode_backref, list);
816                 list_del(&backref->list);
817                 free(backref);
818         }
819         free_orphan_data_extents(&rec->orphan_extents);
820         free_file_extent_holes(&rec->holes);
821         free(rec);
822 }
823
824 static int can_free_inode_rec(struct inode_record *rec)
825 {
826         if (!rec->errors && rec->checked && rec->found_inode_item &&
827             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
828                 return 1;
829         return 0;
830 }
831
832 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
833                                  struct inode_record *rec)
834 {
835         struct cache_extent *cache;
836         struct inode_backref *tmp, *backref;
837         struct ptr_node *node;
838         unsigned char filetype;
839
840         if (!rec->found_inode_item)
841                 return;
842
843         filetype = imode_to_type(rec->imode);
844         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
845                 if (backref->found_dir_item && backref->found_dir_index) {
846                         if (backref->filetype != filetype)
847                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
848                         if (!backref->errors && backref->found_inode_ref &&
849                             rec->nlink == rec->found_link) {
850                                 list_del(&backref->list);
851                                 free(backref);
852                         }
853                 }
854         }
855
856         if (!rec->checked || rec->merging)
857                 return;
858
859         if (S_ISDIR(rec->imode)) {
860                 if (rec->found_size != rec->isize)
861                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
862                 if (rec->found_file_extent)
863                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
864         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
865                 if (rec->found_dir_item)
866                         rec->errors |= I_ERR_ODD_DIR_ITEM;
867                 if (rec->found_size != rec->nbytes)
868                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
869                 if (rec->nlink > 0 && !no_holes &&
870                     (rec->extent_end < rec->isize ||
871                      first_extent_gap(&rec->holes) < rec->isize))
872                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
873         }
874
875         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
876                 if (rec->found_csum_item && rec->nodatasum)
877                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
878                 if (rec->some_csum_missing && !rec->nodatasum)
879                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
880         }
881
882         BUG_ON(rec->refs != 1);
883         if (can_free_inode_rec(rec)) {
884                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
885                 node = container_of(cache, struct ptr_node, cache);
886                 BUG_ON(node->data != rec);
887                 remove_cache_extent(inode_cache, &node->cache);
888                 free(node);
889                 free_inode_rec(rec);
890         }
891 }
892
893 static int check_orphan_item(struct btrfs_root *root, u64 ino)
894 {
895         struct btrfs_path path;
896         struct btrfs_key key;
897         int ret;
898
899         key.objectid = BTRFS_ORPHAN_OBJECTID;
900         key.type = BTRFS_ORPHAN_ITEM_KEY;
901         key.offset = ino;
902
903         btrfs_init_path(&path);
904         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
905         btrfs_release_path(&path);
906         if (ret > 0)
907                 ret = -ENOENT;
908         return ret;
909 }
910
911 static int process_inode_item(struct extent_buffer *eb,
912                               int slot, struct btrfs_key *key,
913                               struct shared_node *active_node)
914 {
915         struct inode_record *rec;
916         struct btrfs_inode_item *item;
917
918         rec = active_node->current;
919         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
920         if (rec->found_inode_item) {
921                 rec->errors |= I_ERR_DUP_INODE_ITEM;
922                 return 1;
923         }
924         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
925         rec->nlink = btrfs_inode_nlink(eb, item);
926         rec->isize = btrfs_inode_size(eb, item);
927         rec->nbytes = btrfs_inode_nbytes(eb, item);
928         rec->imode = btrfs_inode_mode(eb, item);
929         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
930                 rec->nodatasum = 1;
931         rec->found_inode_item = 1;
932         if (rec->nlink == 0)
933                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
934         maybe_free_inode_rec(&active_node->inode_cache, rec);
935         return 0;
936 }
937
938 static struct inode_backref *get_inode_backref(struct inode_record *rec,
939                                                 const char *name,
940                                                 int namelen, u64 dir)
941 {
942         struct inode_backref *backref;
943
944         list_for_each_entry(backref, &rec->backrefs, list) {
945                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
946                         break;
947                 if (backref->dir != dir || backref->namelen != namelen)
948                         continue;
949                 if (memcmp(name, backref->name, namelen))
950                         continue;
951                 return backref;
952         }
953
954         backref = malloc(sizeof(*backref) + namelen + 1);
955         memset(backref, 0, sizeof(*backref));
956         backref->dir = dir;
957         backref->namelen = namelen;
958         memcpy(backref->name, name, namelen);
959         backref->name[namelen] = '\0';
960         list_add_tail(&backref->list, &rec->backrefs);
961         return backref;
962 }
963
964 static int add_inode_backref(struct cache_tree *inode_cache,
965                              u64 ino, u64 dir, u64 index,
966                              const char *name, int namelen,
967                              int filetype, int itemtype, int errors)
968 {
969         struct inode_record *rec;
970         struct inode_backref *backref;
971
972         rec = get_inode_rec(inode_cache, ino, 1);
973         BUG_ON(IS_ERR(rec));
974         backref = get_inode_backref(rec, name, namelen, dir);
975         if (errors)
976                 backref->errors |= errors;
977         if (itemtype == BTRFS_DIR_INDEX_KEY) {
978                 if (backref->found_dir_index)
979                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
980                 if (backref->found_inode_ref && backref->index != index)
981                         backref->errors |= REF_ERR_INDEX_UNMATCH;
982                 if (backref->found_dir_item && backref->filetype != filetype)
983                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
984
985                 backref->index = index;
986                 backref->filetype = filetype;
987                 backref->found_dir_index = 1;
988         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
989                 rec->found_link++;
990                 if (backref->found_dir_item)
991                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
992                 if (backref->found_dir_index && backref->filetype != filetype)
993                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
994
995                 backref->filetype = filetype;
996                 backref->found_dir_item = 1;
997         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
998                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
999                 if (backref->found_inode_ref)
1000                         backref->errors |= REF_ERR_DUP_INODE_REF;
1001                 if (backref->found_dir_index && backref->index != index)
1002                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1003                 else
1004                         backref->index = index;
1005
1006                 backref->ref_type = itemtype;
1007                 backref->found_inode_ref = 1;
1008         } else {
1009                 BUG_ON(1);
1010         }
1011
1012         maybe_free_inode_rec(inode_cache, rec);
1013         return 0;
1014 }
1015
1016 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1017                             struct cache_tree *dst_cache)
1018 {
1019         struct inode_backref *backref;
1020         u32 dir_count = 0;
1021         int ret = 0;
1022
1023         dst->merging = 1;
1024         list_for_each_entry(backref, &src->backrefs, list) {
1025                 if (backref->found_dir_index) {
1026                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1027                                         backref->index, backref->name,
1028                                         backref->namelen, backref->filetype,
1029                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1030                 }
1031                 if (backref->found_dir_item) {
1032                         dir_count++;
1033                         add_inode_backref(dst_cache, dst->ino,
1034                                         backref->dir, 0, backref->name,
1035                                         backref->namelen, backref->filetype,
1036                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1037                 }
1038                 if (backref->found_inode_ref) {
1039                         add_inode_backref(dst_cache, dst->ino,
1040                                         backref->dir, backref->index,
1041                                         backref->name, backref->namelen, 0,
1042                                         backref->ref_type, backref->errors);
1043                 }
1044         }
1045
1046         if (src->found_dir_item)
1047                 dst->found_dir_item = 1;
1048         if (src->found_file_extent)
1049                 dst->found_file_extent = 1;
1050         if (src->found_csum_item)
1051                 dst->found_csum_item = 1;
1052         if (src->some_csum_missing)
1053                 dst->some_csum_missing = 1;
1054         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1055                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1056                 if (ret < 0)
1057                         return ret;
1058         }
1059
1060         BUG_ON(src->found_link < dir_count);
1061         dst->found_link += src->found_link - dir_count;
1062         dst->found_size += src->found_size;
1063         if (src->extent_start != (u64)-1) {
1064                 if (dst->extent_start == (u64)-1) {
1065                         dst->extent_start = src->extent_start;
1066                         dst->extent_end = src->extent_end;
1067                 } else {
1068                         if (dst->extent_end > src->extent_start)
1069                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1070                         else if (dst->extent_end < src->extent_start) {
1071                                 ret = add_file_extent_hole(&dst->holes,
1072                                         dst->extent_end,
1073                                         src->extent_start - dst->extent_end);
1074                         }
1075                         if (dst->extent_end < src->extent_end)
1076                                 dst->extent_end = src->extent_end;
1077                 }
1078         }
1079
1080         dst->errors |= src->errors;
1081         if (src->found_inode_item) {
1082                 if (!dst->found_inode_item) {
1083                         dst->nlink = src->nlink;
1084                         dst->isize = src->isize;
1085                         dst->nbytes = src->nbytes;
1086                         dst->imode = src->imode;
1087                         dst->nodatasum = src->nodatasum;
1088                         dst->found_inode_item = 1;
1089                 } else {
1090                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1091                 }
1092         }
1093         dst->merging = 0;
1094
1095         return 0;
1096 }
1097
1098 static int splice_shared_node(struct shared_node *src_node,
1099                               struct shared_node *dst_node)
1100 {
1101         struct cache_extent *cache;
1102         struct ptr_node *node, *ins;
1103         struct cache_tree *src, *dst;
1104         struct inode_record *rec, *conflict;
1105         u64 current_ino = 0;
1106         int splice = 0;
1107         int ret;
1108
1109         if (--src_node->refs == 0)
1110                 splice = 1;
1111         if (src_node->current)
1112                 current_ino = src_node->current->ino;
1113
1114         src = &src_node->root_cache;
1115         dst = &dst_node->root_cache;
1116 again:
1117         cache = search_cache_extent(src, 0);
1118         while (cache) {
1119                 node = container_of(cache, struct ptr_node, cache);
1120                 rec = node->data;
1121                 cache = next_cache_extent(cache);
1122
1123                 if (splice) {
1124                         remove_cache_extent(src, &node->cache);
1125                         ins = node;
1126                 } else {
1127                         ins = malloc(sizeof(*ins));
1128                         ins->cache.start = node->cache.start;
1129                         ins->cache.size = node->cache.size;
1130                         ins->data = rec;
1131                         rec->refs++;
1132                 }
1133                 ret = insert_cache_extent(dst, &ins->cache);
1134                 if (ret == -EEXIST) {
1135                         conflict = get_inode_rec(dst, rec->ino, 1);
1136                         BUG_ON(IS_ERR(conflict));
1137                         merge_inode_recs(rec, conflict, dst);
1138                         if (rec->checked) {
1139                                 conflict->checked = 1;
1140                                 if (dst_node->current == conflict)
1141                                         dst_node->current = NULL;
1142                         }
1143                         maybe_free_inode_rec(dst, conflict);
1144                         free_inode_rec(rec);
1145                         free(ins);
1146                 } else {
1147                         BUG_ON(ret);
1148                 }
1149         }
1150
1151         if (src == &src_node->root_cache) {
1152                 src = &src_node->inode_cache;
1153                 dst = &dst_node->inode_cache;
1154                 goto again;
1155         }
1156
1157         if (current_ino > 0 && (!dst_node->current ||
1158             current_ino > dst_node->current->ino)) {
1159                 if (dst_node->current) {
1160                         dst_node->current->checked = 1;
1161                         maybe_free_inode_rec(dst, dst_node->current);
1162                 }
1163                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1164                 BUG_ON(IS_ERR(dst_node->current));
1165         }
1166         return 0;
1167 }
1168
1169 static void free_inode_ptr(struct cache_extent *cache)
1170 {
1171         struct ptr_node *node;
1172         struct inode_record *rec;
1173
1174         node = container_of(cache, struct ptr_node, cache);
1175         rec = node->data;
1176         free_inode_rec(rec);
1177         free(node);
1178 }
1179
1180 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1181
1182 static struct shared_node *find_shared_node(struct cache_tree *shared,
1183                                             u64 bytenr)
1184 {
1185         struct cache_extent *cache;
1186         struct shared_node *node;
1187
1188         cache = lookup_cache_extent(shared, bytenr, 1);
1189         if (cache) {
1190                 node = container_of(cache, struct shared_node, cache);
1191                 return node;
1192         }
1193         return NULL;
1194 }
1195
1196 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1197 {
1198         int ret;
1199         struct shared_node *node;
1200
1201         node = calloc(1, sizeof(*node));
1202         node->cache.start = bytenr;
1203         node->cache.size = 1;
1204         cache_tree_init(&node->root_cache);
1205         cache_tree_init(&node->inode_cache);
1206         node->refs = refs;
1207
1208         ret = insert_cache_extent(shared, &node->cache);
1209         BUG_ON(ret);
1210         return 0;
1211 }
1212
1213 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1214                              struct walk_control *wc, int level)
1215 {
1216         struct shared_node *node;
1217         struct shared_node *dest;
1218
1219         if (level == wc->active_node)
1220                 return 0;
1221
1222         BUG_ON(wc->active_node <= level);
1223         node = find_shared_node(&wc->shared, bytenr);
1224         if (!node) {
1225                 add_shared_node(&wc->shared, bytenr, refs);
1226                 node = find_shared_node(&wc->shared, bytenr);
1227                 wc->nodes[level] = node;
1228                 wc->active_node = level;
1229                 return 0;
1230         }
1231
1232         if (wc->root_level == wc->active_node &&
1233             btrfs_root_refs(&root->root_item) == 0) {
1234                 if (--node->refs == 0) {
1235                         free_inode_recs_tree(&node->root_cache);
1236                         free_inode_recs_tree(&node->inode_cache);
1237                         remove_cache_extent(&wc->shared, &node->cache);
1238                         free(node);
1239                 }
1240                 return 1;
1241         }
1242
1243         dest = wc->nodes[wc->active_node];
1244         splice_shared_node(node, dest);
1245         if (node->refs == 0) {
1246                 remove_cache_extent(&wc->shared, &node->cache);
1247                 free(node);
1248         }
1249         return 1;
1250 }
1251
1252 static int leave_shared_node(struct btrfs_root *root,
1253                              struct walk_control *wc, int level)
1254 {
1255         struct shared_node *node;
1256         struct shared_node *dest;
1257         int i;
1258
1259         if (level == wc->root_level)
1260                 return 0;
1261
1262         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1263                 if (wc->nodes[i])
1264                         break;
1265         }
1266         BUG_ON(i >= BTRFS_MAX_LEVEL);
1267
1268         node = wc->nodes[wc->active_node];
1269         wc->nodes[wc->active_node] = NULL;
1270         wc->active_node = i;
1271
1272         dest = wc->nodes[wc->active_node];
1273         if (wc->active_node < wc->root_level ||
1274             btrfs_root_refs(&root->root_item) > 0) {
1275                 BUG_ON(node->refs <= 1);
1276                 splice_shared_node(node, dest);
1277         } else {
1278                 BUG_ON(node->refs < 2);
1279                 node->refs--;
1280         }
1281         return 0;
1282 }
1283
1284 /*
1285  * Returns:
1286  * < 0 - on error
1287  * 1   - if the root with id child_root_id is a child of root parent_root_id
1288  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1289  *       has other root(s) as parent(s)
1290  * 2   - if the root child_root_id doesn't have any parent roots
1291  */
1292 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1293                          u64 child_root_id)
1294 {
1295         struct btrfs_path path;
1296         struct btrfs_key key;
1297         struct extent_buffer *leaf;
1298         int has_parent = 0;
1299         int ret;
1300
1301         btrfs_init_path(&path);
1302
1303         key.objectid = parent_root_id;
1304         key.type = BTRFS_ROOT_REF_KEY;
1305         key.offset = child_root_id;
1306         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1307                                 0, 0);
1308         if (ret < 0)
1309                 return ret;
1310         btrfs_release_path(&path);
1311         if (!ret)
1312                 return 1;
1313
1314         key.objectid = child_root_id;
1315         key.type = BTRFS_ROOT_BACKREF_KEY;
1316         key.offset = 0;
1317         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1318                                 0, 0);
1319         if (ret < 0)
1320                 goto out;
1321
1322         while (1) {
1323                 leaf = path.nodes[0];
1324                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1325                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1326                         if (ret)
1327                                 break;
1328                         leaf = path.nodes[0];
1329                 }
1330
1331                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1332                 if (key.objectid != child_root_id ||
1333                     key.type != BTRFS_ROOT_BACKREF_KEY)
1334                         break;
1335
1336                 has_parent = 1;
1337
1338                 if (key.offset == parent_root_id) {
1339                         btrfs_release_path(&path);
1340                         return 1;
1341                 }
1342
1343                 path.slots[0]++;
1344         }
1345 out:
1346         btrfs_release_path(&path);
1347         if (ret < 0)
1348                 return ret;
1349         return has_parent ? 0 : 2;
1350 }
1351
1352 static int process_dir_item(struct btrfs_root *root,
1353                             struct extent_buffer *eb,
1354                             int slot, struct btrfs_key *key,
1355                             struct shared_node *active_node)
1356 {
1357         u32 total;
1358         u32 cur = 0;
1359         u32 len;
1360         u32 name_len;
1361         u32 data_len;
1362         int error;
1363         int nritems = 0;
1364         int filetype;
1365         struct btrfs_dir_item *di;
1366         struct inode_record *rec;
1367         struct cache_tree *root_cache;
1368         struct cache_tree *inode_cache;
1369         struct btrfs_key location;
1370         char namebuf[BTRFS_NAME_LEN];
1371
1372         root_cache = &active_node->root_cache;
1373         inode_cache = &active_node->inode_cache;
1374         rec = active_node->current;
1375         rec->found_dir_item = 1;
1376
1377         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1378         total = btrfs_item_size_nr(eb, slot);
1379         while (cur < total) {
1380                 nritems++;
1381                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1382                 name_len = btrfs_dir_name_len(eb, di);
1383                 data_len = btrfs_dir_data_len(eb, di);
1384                 filetype = btrfs_dir_type(eb, di);
1385
1386                 rec->found_size += name_len;
1387                 if (name_len <= BTRFS_NAME_LEN) {
1388                         len = name_len;
1389                         error = 0;
1390                 } else {
1391                         len = BTRFS_NAME_LEN;
1392                         error = REF_ERR_NAME_TOO_LONG;
1393                 }
1394                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1395
1396                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1397                         add_inode_backref(inode_cache, location.objectid,
1398                                           key->objectid, key->offset, namebuf,
1399                                           len, filetype, key->type, error);
1400                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1401                         add_inode_backref(root_cache, location.objectid,
1402                                           key->objectid, key->offset,
1403                                           namebuf, len, filetype,
1404                                           key->type, error);
1405                 } else {
1406                         fprintf(stderr, "invalid location in dir item %u\n",
1407                                 location.type);
1408                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1409                                           key->objectid, key->offset, namebuf,
1410                                           len, filetype, key->type, error);
1411                 }
1412
1413                 len = sizeof(*di) + name_len + data_len;
1414                 di = (struct btrfs_dir_item *)((char *)di + len);
1415                 cur += len;
1416         }
1417         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1418                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1419
1420         return 0;
1421 }
1422
1423 static int process_inode_ref(struct extent_buffer *eb,
1424                              int slot, struct btrfs_key *key,
1425                              struct shared_node *active_node)
1426 {
1427         u32 total;
1428         u32 cur = 0;
1429         u32 len;
1430         u32 name_len;
1431         u64 index;
1432         int error;
1433         struct cache_tree *inode_cache;
1434         struct btrfs_inode_ref *ref;
1435         char namebuf[BTRFS_NAME_LEN];
1436
1437         inode_cache = &active_node->inode_cache;
1438
1439         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1440         total = btrfs_item_size_nr(eb, slot);
1441         while (cur < total) {
1442                 name_len = btrfs_inode_ref_name_len(eb, ref);
1443                 index = btrfs_inode_ref_index(eb, ref);
1444                 if (name_len <= BTRFS_NAME_LEN) {
1445                         len = name_len;
1446                         error = 0;
1447                 } else {
1448                         len = BTRFS_NAME_LEN;
1449                         error = REF_ERR_NAME_TOO_LONG;
1450                 }
1451                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1452                 add_inode_backref(inode_cache, key->objectid, key->offset,
1453                                   index, namebuf, len, 0, key->type, error);
1454
1455                 len = sizeof(*ref) + name_len;
1456                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1457                 cur += len;
1458         }
1459         return 0;
1460 }
1461
1462 static int process_inode_extref(struct extent_buffer *eb,
1463                                 int slot, struct btrfs_key *key,
1464                                 struct shared_node *active_node)
1465 {
1466         u32 total;
1467         u32 cur = 0;
1468         u32 len;
1469         u32 name_len;
1470         u64 index;
1471         u64 parent;
1472         int error;
1473         struct cache_tree *inode_cache;
1474         struct btrfs_inode_extref *extref;
1475         char namebuf[BTRFS_NAME_LEN];
1476
1477         inode_cache = &active_node->inode_cache;
1478
1479         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1480         total = btrfs_item_size_nr(eb, slot);
1481         while (cur < total) {
1482                 name_len = btrfs_inode_extref_name_len(eb, extref);
1483                 index = btrfs_inode_extref_index(eb, extref);
1484                 parent = btrfs_inode_extref_parent(eb, extref);
1485                 if (name_len <= BTRFS_NAME_LEN) {
1486                         len = name_len;
1487                         error = 0;
1488                 } else {
1489                         len = BTRFS_NAME_LEN;
1490                         error = REF_ERR_NAME_TOO_LONG;
1491                 }
1492                 read_extent_buffer(eb, namebuf,
1493                                    (unsigned long)(extref + 1), len);
1494                 add_inode_backref(inode_cache, key->objectid, parent,
1495                                   index, namebuf, len, 0, key->type, error);
1496
1497                 len = sizeof(*extref) + name_len;
1498                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1499                 cur += len;
1500         }
1501         return 0;
1502
1503 }
1504
1505 static int count_csum_range(struct btrfs_root *root, u64 start,
1506                             u64 len, u64 *found)
1507 {
1508         struct btrfs_key key;
1509         struct btrfs_path path;
1510         struct extent_buffer *leaf;
1511         int ret;
1512         size_t size;
1513         *found = 0;
1514         u64 csum_end;
1515         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1516
1517         btrfs_init_path(&path);
1518
1519         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1520         key.offset = start;
1521         key.type = BTRFS_EXTENT_CSUM_KEY;
1522
1523         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1524                                 &key, &path, 0, 0);
1525         if (ret < 0)
1526                 goto out;
1527         if (ret > 0 && path.slots[0] > 0) {
1528                 leaf = path.nodes[0];
1529                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1530                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1531                     key.type == BTRFS_EXTENT_CSUM_KEY)
1532                         path.slots[0]--;
1533         }
1534
1535         while (len > 0) {
1536                 leaf = path.nodes[0];
1537                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1538                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1539                         if (ret > 0)
1540                                 break;
1541                         else if (ret < 0)
1542                                 goto out;
1543                         leaf = path.nodes[0];
1544                 }
1545
1546                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1547                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1548                     key.type != BTRFS_EXTENT_CSUM_KEY)
1549                         break;
1550
1551                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1552                 if (key.offset >= start + len)
1553                         break;
1554
1555                 if (key.offset > start)
1556                         start = key.offset;
1557
1558                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1559                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1560                 if (csum_end > start) {
1561                         size = min(csum_end - start, len);
1562                         len -= size;
1563                         start += size;
1564                         *found += size;
1565                 }
1566
1567                 path.slots[0]++;
1568         }
1569 out:
1570         btrfs_release_path(&path);
1571         if (ret < 0)
1572                 return ret;
1573         return 0;
1574 }
1575
1576 static int process_file_extent(struct btrfs_root *root,
1577                                 struct extent_buffer *eb,
1578                                 int slot, struct btrfs_key *key,
1579                                 struct shared_node *active_node)
1580 {
1581         struct inode_record *rec;
1582         struct btrfs_file_extent_item *fi;
1583         u64 num_bytes = 0;
1584         u64 disk_bytenr = 0;
1585         u64 extent_offset = 0;
1586         u64 mask = root->sectorsize - 1;
1587         int extent_type;
1588         int ret;
1589
1590         rec = active_node->current;
1591         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1592         rec->found_file_extent = 1;
1593
1594         if (rec->extent_start == (u64)-1) {
1595                 rec->extent_start = key->offset;
1596                 rec->extent_end = key->offset;
1597         }
1598
1599         if (rec->extent_end > key->offset)
1600                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1601         else if (rec->extent_end < key->offset) {
1602                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1603                                            key->offset - rec->extent_end);
1604                 if (ret < 0)
1605                         return ret;
1606         }
1607
1608         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1609         extent_type = btrfs_file_extent_type(eb, fi);
1610
1611         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1612                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1613                 if (num_bytes == 0)
1614                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1615                 rec->found_size += num_bytes;
1616                 num_bytes = (num_bytes + mask) & ~mask;
1617         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1618                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1619                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1620                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1621                 extent_offset = btrfs_file_extent_offset(eb, fi);
1622                 if (num_bytes == 0 || (num_bytes & mask))
1623                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1624                 if (num_bytes + extent_offset >
1625                     btrfs_file_extent_ram_bytes(eb, fi))
1626                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1627                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1628                     (btrfs_file_extent_compression(eb, fi) ||
1629                      btrfs_file_extent_encryption(eb, fi) ||
1630                      btrfs_file_extent_other_encoding(eb, fi)))
1631                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1632                 if (disk_bytenr > 0)
1633                         rec->found_size += num_bytes;
1634         } else {
1635                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1636         }
1637         rec->extent_end = key->offset + num_bytes;
1638
1639         /*
1640          * The data reloc tree will copy full extents into its inode and then
1641          * copy the corresponding csums.  Because the extent it copied could be
1642          * a preallocated extent that hasn't been written to yet there may be no
1643          * csums to copy, ergo we won't have csums for our file extent.  This is
1644          * ok so just don't bother checking csums if the inode belongs to the
1645          * data reloc tree.
1646          */
1647         if (disk_bytenr > 0 &&
1648             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1649                 u64 found;
1650                 if (btrfs_file_extent_compression(eb, fi))
1651                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1652                 else
1653                         disk_bytenr += extent_offset;
1654
1655                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1656                 if (ret < 0)
1657                         return ret;
1658                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1659                         if (found > 0)
1660                                 rec->found_csum_item = 1;
1661                         if (found < num_bytes)
1662                                 rec->some_csum_missing = 1;
1663                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1664                         if (found > 0)
1665                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1666                 }
1667         }
1668         return 0;
1669 }
1670
1671 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1672                             struct walk_control *wc)
1673 {
1674         struct btrfs_key key;
1675         u32 nritems;
1676         int i;
1677         int ret = 0;
1678         struct cache_tree *inode_cache;
1679         struct shared_node *active_node;
1680
1681         if (wc->root_level == wc->active_node &&
1682             btrfs_root_refs(&root->root_item) == 0)
1683                 return 0;
1684
1685         active_node = wc->nodes[wc->active_node];
1686         inode_cache = &active_node->inode_cache;
1687         nritems = btrfs_header_nritems(eb);
1688         for (i = 0; i < nritems; i++) {
1689                 btrfs_item_key_to_cpu(eb, &key, i);
1690
1691                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1692                         continue;
1693                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1694                         continue;
1695
1696                 if (active_node->current == NULL ||
1697                     active_node->current->ino < key.objectid) {
1698                         if (active_node->current) {
1699                                 active_node->current->checked = 1;
1700                                 maybe_free_inode_rec(inode_cache,
1701                                                      active_node->current);
1702                         }
1703                         active_node->current = get_inode_rec(inode_cache,
1704                                                              key.objectid, 1);
1705                         BUG_ON(IS_ERR(active_node->current));
1706                 }
1707                 switch (key.type) {
1708                 case BTRFS_DIR_ITEM_KEY:
1709                 case BTRFS_DIR_INDEX_KEY:
1710                         ret = process_dir_item(root, eb, i, &key, active_node);
1711                         break;
1712                 case BTRFS_INODE_REF_KEY:
1713                         ret = process_inode_ref(eb, i, &key, active_node);
1714                         break;
1715                 case BTRFS_INODE_EXTREF_KEY:
1716                         ret = process_inode_extref(eb, i, &key, active_node);
1717                         break;
1718                 case BTRFS_INODE_ITEM_KEY:
1719                         ret = process_inode_item(eb, i, &key, active_node);
1720                         break;
1721                 case BTRFS_EXTENT_DATA_KEY:
1722                         ret = process_file_extent(root, eb, i, &key,
1723                                                   active_node);
1724                         break;
1725                 default:
1726                         break;
1727                 };
1728         }
1729         return ret;
1730 }
1731
1732 static void reada_walk_down(struct btrfs_root *root,
1733                             struct extent_buffer *node, int slot)
1734 {
1735         u64 bytenr;
1736         u64 ptr_gen;
1737         u32 nritems;
1738         u32 blocksize;
1739         int i;
1740         int level;
1741
1742         level = btrfs_header_level(node);
1743         if (level != 1)
1744                 return;
1745
1746         nritems = btrfs_header_nritems(node);
1747         blocksize = btrfs_level_size(root, level - 1);
1748         for (i = slot; i < nritems; i++) {
1749                 bytenr = btrfs_node_blockptr(node, i);
1750                 ptr_gen = btrfs_node_ptr_generation(node, i);
1751                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1752         }
1753 }
1754
1755 /*
1756  * Check the child node/leaf by the following condition:
1757  * 1. the first item key of the node/leaf should be the same with the one
1758  *    in parent.
1759  * 2. block in parent node should match the child node/leaf.
1760  * 3. generation of parent node and child's header should be consistent.
1761  *
1762  * Or the child node/leaf pointed by the key in parent is not valid.
1763  *
1764  * We hope to check leaf owner too, but since subvol may share leaves,
1765  * which makes leaf owner check not so strong, key check should be
1766  * sufficient enough for that case.
1767  */
1768 static int check_child_node(struct btrfs_root *root,
1769                             struct extent_buffer *parent, int slot,
1770                             struct extent_buffer *child)
1771 {
1772         struct btrfs_key parent_key;
1773         struct btrfs_key child_key;
1774         int ret = 0;
1775
1776         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1777         if (btrfs_header_level(child) == 0)
1778                 btrfs_item_key_to_cpu(child, &child_key, 0);
1779         else
1780                 btrfs_node_key_to_cpu(child, &child_key, 0);
1781
1782         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1783                 ret = -EINVAL;
1784                 fprintf(stderr,
1785                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1786                         parent_key.objectid, parent_key.type, parent_key.offset,
1787                         child_key.objectid, child_key.type, child_key.offset);
1788         }
1789         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1790                 ret = -EINVAL;
1791                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1792                         btrfs_node_blockptr(parent, slot),
1793                         btrfs_header_bytenr(child));
1794         }
1795         if (btrfs_node_ptr_generation(parent, slot) !=
1796             btrfs_header_generation(child)) {
1797                 ret = -EINVAL;
1798                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1799                         btrfs_header_generation(child),
1800                         btrfs_node_ptr_generation(parent, slot));
1801         }
1802         return ret;
1803 }
1804
1805 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1806                           struct walk_control *wc, int *level)
1807 {
1808         enum btrfs_tree_block_status status;
1809         u64 bytenr;
1810         u64 ptr_gen;
1811         struct extent_buffer *next;
1812         struct extent_buffer *cur;
1813         u32 blocksize;
1814         int ret, err = 0;
1815         u64 refs;
1816
1817         WARN_ON(*level < 0);
1818         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1819         ret = btrfs_lookup_extent_info(NULL, root,
1820                                        path->nodes[*level]->start,
1821                                        *level, 1, &refs, NULL);
1822         if (ret < 0) {
1823                 err = ret;
1824                 goto out;
1825         }
1826
1827         if (refs > 1) {
1828                 ret = enter_shared_node(root, path->nodes[*level]->start,
1829                                         refs, wc, *level);
1830                 if (ret > 0) {
1831                         err = ret;
1832                         goto out;
1833                 }
1834         }
1835
1836         while (*level >= 0) {
1837                 WARN_ON(*level < 0);
1838                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1839                 cur = path->nodes[*level];
1840
1841                 if (btrfs_header_level(cur) != *level)
1842                         WARN_ON(1);
1843
1844                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1845                         break;
1846                 if (*level == 0) {
1847                         ret = process_one_leaf(root, cur, wc);
1848                         if (ret < 0)
1849                                 err = ret;
1850                         break;
1851                 }
1852                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1853                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1854                 blocksize = btrfs_level_size(root, *level - 1);
1855                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1856                                                1, &refs, NULL);
1857                 if (ret < 0)
1858                         refs = 0;
1859
1860                 if (refs > 1) {
1861                         ret = enter_shared_node(root, bytenr, refs,
1862                                                 wc, *level - 1);
1863                         if (ret > 0) {
1864                                 path->slots[*level]++;
1865                                 continue;
1866                         }
1867                 }
1868
1869                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1870                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1871                         free_extent_buffer(next);
1872                         reada_walk_down(root, cur, path->slots[*level]);
1873                         next = read_tree_block(root, bytenr, blocksize,
1874                                                ptr_gen);
1875                         if (!extent_buffer_uptodate(next)) {
1876                                 struct btrfs_key node_key;
1877
1878                                 btrfs_node_key_to_cpu(path->nodes[*level],
1879                                                       &node_key,
1880                                                       path->slots[*level]);
1881                                 btrfs_add_corrupt_extent_record(root->fs_info,
1882                                                 &node_key,
1883                                                 path->nodes[*level]->start,
1884                                                 root->leafsize, *level);
1885                                 err = -EIO;
1886                                 goto out;
1887                         }
1888                 }
1889
1890                 ret = check_child_node(root, cur, path->slots[*level], next);
1891                 if (ret) {
1892                         err = ret;
1893                         goto out;
1894                 }
1895
1896                 if (btrfs_is_leaf(next))
1897                         status = btrfs_check_leaf(root, NULL, next);
1898                 else
1899                         status = btrfs_check_node(root, NULL, next);
1900                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1901                         free_extent_buffer(next);
1902                         err = -EIO;
1903                         goto out;
1904                 }
1905
1906                 *level = *level - 1;
1907                 free_extent_buffer(path->nodes[*level]);
1908                 path->nodes[*level] = next;
1909                 path->slots[*level] = 0;
1910         }
1911 out:
1912         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1913         return err;
1914 }
1915
1916 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1917                         struct walk_control *wc, int *level)
1918 {
1919         int i;
1920         struct extent_buffer *leaf;
1921
1922         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1923                 leaf = path->nodes[i];
1924                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1925                         path->slots[i]++;
1926                         *level = i;
1927                         return 0;
1928                 } else {
1929                         free_extent_buffer(path->nodes[*level]);
1930                         path->nodes[*level] = NULL;
1931                         BUG_ON(*level > wc->active_node);
1932                         if (*level == wc->active_node)
1933                                 leave_shared_node(root, wc, *level);
1934                         *level = i + 1;
1935                 }
1936         }
1937         return 1;
1938 }
1939
1940 static int check_root_dir(struct inode_record *rec)
1941 {
1942         struct inode_backref *backref;
1943         int ret = -1;
1944
1945         if (!rec->found_inode_item || rec->errors)
1946                 goto out;
1947         if (rec->nlink != 1 || rec->found_link != 0)
1948                 goto out;
1949         if (list_empty(&rec->backrefs))
1950                 goto out;
1951         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1952         if (!backref->found_inode_ref)
1953                 goto out;
1954         if (backref->index != 0 || backref->namelen != 2 ||
1955             memcmp(backref->name, "..", 2))
1956                 goto out;
1957         if (backref->found_dir_index || backref->found_dir_item)
1958                 goto out;
1959         ret = 0;
1960 out:
1961         return ret;
1962 }
1963
1964 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1965                               struct btrfs_root *root, struct btrfs_path *path,
1966                               struct inode_record *rec)
1967 {
1968         struct btrfs_inode_item *ei;
1969         struct btrfs_key key;
1970         int ret;
1971
1972         key.objectid = rec->ino;
1973         key.type = BTRFS_INODE_ITEM_KEY;
1974         key.offset = (u64)-1;
1975
1976         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1977         if (ret < 0)
1978                 goto out;
1979         if (ret) {
1980                 if (!path->slots[0]) {
1981                         ret = -ENOENT;
1982                         goto out;
1983                 }
1984                 path->slots[0]--;
1985                 ret = 0;
1986         }
1987         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1988         if (key.objectid != rec->ino) {
1989                 ret = -ENOENT;
1990                 goto out;
1991         }
1992
1993         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1994                             struct btrfs_inode_item);
1995         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1996         btrfs_mark_buffer_dirty(path->nodes[0]);
1997         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1998         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1999                root->root_key.objectid);
2000 out:
2001         btrfs_release_path(path);
2002         return ret;
2003 }
2004
2005 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2006                                     struct btrfs_root *root,
2007                                     struct btrfs_path *path,
2008                                     struct inode_record *rec)
2009 {
2010         int ret;
2011
2012         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2013         btrfs_release_path(path);
2014         if (!ret)
2015                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2016         return ret;
2017 }
2018
2019 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2020                                struct btrfs_root *root,
2021                                struct btrfs_path *path,
2022                                struct inode_record *rec)
2023 {
2024         struct btrfs_inode_item *ei;
2025         struct btrfs_key key;
2026         int ret = 0;
2027
2028         key.objectid = rec->ino;
2029         key.type = BTRFS_INODE_ITEM_KEY;
2030         key.offset = 0;
2031
2032         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2033         if (ret) {
2034                 if (ret > 0)
2035                         ret = -ENOENT;
2036                 goto out;
2037         }
2038
2039         /* Since ret == 0, no need to check anything */
2040         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2041                             struct btrfs_inode_item);
2042         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2043         btrfs_mark_buffer_dirty(path->nodes[0]);
2044         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2045         printf("reset nbytes for ino %llu root %llu\n",
2046                rec->ino, root->root_key.objectid);
2047 out:
2048         btrfs_release_path(path);
2049         return ret;
2050 }
2051
2052 static int add_missing_dir_index(struct btrfs_root *root,
2053                                  struct cache_tree *inode_cache,
2054                                  struct inode_record *rec,
2055                                  struct inode_backref *backref)
2056 {
2057         struct btrfs_path *path;
2058         struct btrfs_trans_handle *trans;
2059         struct btrfs_dir_item *dir_item;
2060         struct extent_buffer *leaf;
2061         struct btrfs_key key;
2062         struct btrfs_disk_key disk_key;
2063         struct inode_record *dir_rec;
2064         unsigned long name_ptr;
2065         u32 data_size = sizeof(*dir_item) + backref->namelen;
2066         int ret;
2067
2068         path = btrfs_alloc_path();
2069         if (!path)
2070                 return -ENOMEM;
2071
2072         trans = btrfs_start_transaction(root, 1);
2073         if (IS_ERR(trans)) {
2074                 btrfs_free_path(path);
2075                 return PTR_ERR(trans);
2076         }
2077
2078         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2079                 (unsigned long long)rec->ino);
2080         key.objectid = backref->dir;
2081         key.type = BTRFS_DIR_INDEX_KEY;
2082         key.offset = backref->index;
2083
2084         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2085         BUG_ON(ret);
2086
2087         leaf = path->nodes[0];
2088         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2089
2090         disk_key.objectid = cpu_to_le64(rec->ino);
2091         disk_key.type = BTRFS_INODE_ITEM_KEY;
2092         disk_key.offset = 0;
2093
2094         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2095         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2096         btrfs_set_dir_data_len(leaf, dir_item, 0);
2097         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2098         name_ptr = (unsigned long)(dir_item + 1);
2099         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2100         btrfs_mark_buffer_dirty(leaf);
2101         btrfs_free_path(path);
2102         btrfs_commit_transaction(trans, root);
2103
2104         backref->found_dir_index = 1;
2105         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2106         BUG_ON(IS_ERR(dir_rec));
2107         if (!dir_rec)
2108                 return 0;
2109         dir_rec->found_size += backref->namelen;
2110         if (dir_rec->found_size == dir_rec->isize &&
2111             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2112                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2113         if (dir_rec->found_size != dir_rec->isize)
2114                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2115
2116         return 0;
2117 }
2118
2119 static int delete_dir_index(struct btrfs_root *root,
2120                             struct cache_tree *inode_cache,
2121                             struct inode_record *rec,
2122                             struct inode_backref *backref)
2123 {
2124         struct btrfs_trans_handle *trans;
2125         struct btrfs_dir_item *di;
2126         struct btrfs_path *path;
2127         int ret = 0;
2128
2129         path = btrfs_alloc_path();
2130         if (!path)
2131                 return -ENOMEM;
2132
2133         trans = btrfs_start_transaction(root, 1);
2134         if (IS_ERR(trans)) {
2135                 btrfs_free_path(path);
2136                 return PTR_ERR(trans);
2137         }
2138
2139
2140         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2141                 (unsigned long long)backref->dir,
2142                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2143                 (unsigned long long)root->objectid);
2144
2145         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2146                                     backref->name, backref->namelen,
2147                                     backref->index, -1);
2148         if (IS_ERR(di)) {
2149                 ret = PTR_ERR(di);
2150                 btrfs_free_path(path);
2151                 btrfs_commit_transaction(trans, root);
2152                 if (ret == -ENOENT)
2153                         return 0;
2154                 return ret;
2155         }
2156
2157         if (!di)
2158                 ret = btrfs_del_item(trans, root, path);
2159         else
2160                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2161         BUG_ON(ret);
2162         btrfs_free_path(path);
2163         btrfs_commit_transaction(trans, root);
2164         return ret;
2165 }
2166
2167 static int create_inode_item(struct btrfs_root *root,
2168                              struct inode_record *rec,
2169                              struct inode_backref *backref, int root_dir)
2170 {
2171         struct btrfs_trans_handle *trans;
2172         struct btrfs_inode_item inode_item;
2173         time_t now = time(NULL);
2174         int ret;
2175
2176         trans = btrfs_start_transaction(root, 1);
2177         if (IS_ERR(trans)) {
2178                 ret = PTR_ERR(trans);
2179                 return ret;
2180         }
2181
2182         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2183                 "be incomplete, please check permissions and content after "
2184                 "the fsck completes.\n", (unsigned long long)root->objectid,
2185                 (unsigned long long)rec->ino);
2186
2187         memset(&inode_item, 0, sizeof(inode_item));
2188         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2189         if (root_dir)
2190                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2191         else
2192                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2193         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2194         if (rec->found_dir_item) {
2195                 if (rec->found_file_extent)
2196                         fprintf(stderr, "root %llu inode %llu has both a dir "
2197                                 "item and extents, unsure if it is a dir or a "
2198                                 "regular file so setting it as a directory\n",
2199                                 (unsigned long long)root->objectid,
2200                                 (unsigned long long)rec->ino);
2201                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2202                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2203         } else if (!rec->found_dir_item) {
2204                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2205                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2206         }
2207         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2208         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2209         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2210         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2211         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2212         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2213         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2214         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2215
2216         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2217         BUG_ON(ret);
2218         btrfs_commit_transaction(trans, root);
2219         return 0;
2220 }
2221
2222 static int repair_inode_backrefs(struct btrfs_root *root,
2223                                  struct inode_record *rec,
2224                                  struct cache_tree *inode_cache,
2225                                  int delete)
2226 {
2227         struct inode_backref *tmp, *backref;
2228         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2229         int ret = 0;
2230         int repaired = 0;
2231
2232         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2233                 if (!delete && rec->ino == root_dirid) {
2234                         if (!rec->found_inode_item) {
2235                                 ret = create_inode_item(root, rec, backref, 1);
2236                                 if (ret)
2237                                         break;
2238                                 repaired++;
2239                         }
2240                 }
2241
2242                 /* Index 0 for root dir's are special, don't mess with it */
2243                 if (rec->ino == root_dirid && backref->index == 0)
2244                         continue;
2245
2246                 if (delete &&
2247                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2248                      (backref->found_dir_index && backref->found_inode_ref &&
2249                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2250                         ret = delete_dir_index(root, inode_cache, rec, backref);
2251                         if (ret)
2252                                 break;
2253                         repaired++;
2254                         list_del(&backref->list);
2255                         free(backref);
2256                 }
2257
2258                 if (!delete && !backref->found_dir_index &&
2259                     backref->found_dir_item && backref->found_inode_ref) {
2260                         ret = add_missing_dir_index(root, inode_cache, rec,
2261                                                     backref);
2262                         if (ret)
2263                                 break;
2264                         repaired++;
2265                         if (backref->found_dir_item &&
2266                             backref->found_dir_index &&
2267                             backref->found_dir_index) {
2268                                 if (!backref->errors &&
2269                                     backref->found_inode_ref) {
2270                                         list_del(&backref->list);
2271                                         free(backref);
2272                                 }
2273                         }
2274                 }
2275
2276                 if (!delete && (!backref->found_dir_index &&
2277                                 !backref->found_dir_item &&
2278                                 backref->found_inode_ref)) {
2279                         struct btrfs_trans_handle *trans;
2280                         struct btrfs_key location;
2281
2282                         ret = check_dir_conflict(root, backref->name,
2283                                                  backref->namelen,
2284                                                  backref->dir,
2285                                                  backref->index);
2286                         if (ret) {
2287                                 /*
2288                                  * let nlink fixing routine to handle it,
2289                                  * which can do it better.
2290                                  */
2291                                 ret = 0;
2292                                 break;
2293                         }
2294                         location.objectid = rec->ino;
2295                         location.type = BTRFS_INODE_ITEM_KEY;
2296                         location.offset = 0;
2297
2298                         trans = btrfs_start_transaction(root, 1);
2299                         if (IS_ERR(trans)) {
2300                                 ret = PTR_ERR(trans);
2301                                 break;
2302                         }
2303                         fprintf(stderr, "adding missing dir index/item pair "
2304                                 "for inode %llu\n",
2305                                 (unsigned long long)rec->ino);
2306                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2307                                                     backref->namelen,
2308                                                     backref->dir, &location,
2309                                                     imode_to_type(rec->imode),
2310                                                     backref->index);
2311                         BUG_ON(ret);
2312                         btrfs_commit_transaction(trans, root);
2313                         repaired++;
2314                 }
2315
2316                 if (!delete && (backref->found_inode_ref &&
2317                                 backref->found_dir_index &&
2318                                 backref->found_dir_item &&
2319                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2320                                 !rec->found_inode_item)) {
2321                         ret = create_inode_item(root, rec, backref, 0);
2322                         if (ret)
2323                                 break;
2324                         repaired++;
2325                 }
2326
2327         }
2328         return ret ? ret : repaired;
2329 }
2330
2331 /*
2332  * To determine the file type for nlink/inode_item repair
2333  *
2334  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2335  * Return -ENOENT if file type is not found.
2336  */
2337 static int find_file_type(struct inode_record *rec, u8 *type)
2338 {
2339         struct inode_backref *backref;
2340
2341         /* For inode item recovered case */
2342         if (rec->found_inode_item) {
2343                 *type = imode_to_type(rec->imode);
2344                 return 0;
2345         }
2346
2347         list_for_each_entry(backref, &rec->backrefs, list) {
2348                 if (backref->found_dir_index || backref->found_dir_item) {
2349                         *type = backref->filetype;
2350                         return 0;
2351                 }
2352         }
2353         return -ENOENT;
2354 }
2355
2356 /*
2357  * To determine the file name for nlink repair
2358  *
2359  * Return 0 if file name is found, set name and namelen.
2360  * Return -ENOENT if file name is not found.
2361  */
2362 static int find_file_name(struct inode_record *rec,
2363                           char *name, int *namelen)
2364 {
2365         struct inode_backref *backref;
2366
2367         list_for_each_entry(backref, &rec->backrefs, list) {
2368                 if (backref->found_dir_index || backref->found_dir_item ||
2369                     backref->found_inode_ref) {
2370                         memcpy(name, backref->name, backref->namelen);
2371                         *namelen = backref->namelen;
2372                         return 0;
2373                 }
2374         }
2375         return -ENOENT;
2376 }
2377
2378 /* Reset the nlink of the inode to the correct one */
2379 static int reset_nlink(struct btrfs_trans_handle *trans,
2380                        struct btrfs_root *root,
2381                        struct btrfs_path *path,
2382                        struct inode_record *rec)
2383 {
2384         struct inode_backref *backref;
2385         struct inode_backref *tmp;
2386         struct btrfs_key key;
2387         struct btrfs_inode_item *inode_item;
2388         int ret = 0;
2389
2390         /* We don't believe this either, reset it and iterate backref */
2391         rec->found_link = 0;
2392
2393         /* Remove all backref including the valid ones */
2394         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2395                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2396                                    backref->index, backref->name,
2397                                    backref->namelen, 0);
2398                 if (ret < 0)
2399                         goto out;
2400
2401                 /* remove invalid backref, so it won't be added back */
2402                 if (!(backref->found_dir_index &&
2403                       backref->found_dir_item &&
2404                       backref->found_inode_ref)) {
2405                         list_del(&backref->list);
2406                         free(backref);
2407                 } else {
2408                         rec->found_link++;
2409                 }
2410         }
2411
2412         /* Set nlink to 0 */
2413         key.objectid = rec->ino;
2414         key.type = BTRFS_INODE_ITEM_KEY;
2415         key.offset = 0;
2416         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2417         if (ret < 0)
2418                 goto out;
2419         if (ret > 0) {
2420                 ret = -ENOENT;
2421                 goto out;
2422         }
2423         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2424                                     struct btrfs_inode_item);
2425         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2426         btrfs_mark_buffer_dirty(path->nodes[0]);
2427         btrfs_release_path(path);
2428
2429         /*
2430          * Add back valid inode_ref/dir_item/dir_index,
2431          * add_link() will handle the nlink inc, so new nlink must be correct
2432          */
2433         list_for_each_entry(backref, &rec->backrefs, list) {
2434                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2435                                      backref->name, backref->namelen,
2436                                      backref->filetype, &backref->index, 1);
2437                 if (ret < 0)
2438                         goto out;
2439         }
2440 out:
2441         btrfs_release_path(path);
2442         return ret;
2443 }
2444
2445 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2446                                struct btrfs_root *root,
2447                                struct btrfs_path *path,
2448                                struct inode_record *rec)
2449 {
2450         char *dir_name = "lost+found";
2451         char namebuf[BTRFS_NAME_LEN] = {0};
2452         u64 lost_found_ino;
2453         u32 mode = 0700;
2454         u8 type = 0;
2455         int namelen = 0;
2456         int name_recovered = 0;
2457         int type_recovered = 0;
2458         int ret = 0;
2459
2460         /*
2461          * Get file name and type first before these invalid inode ref
2462          * are deleted by remove_all_invalid_backref()
2463          */
2464         name_recovered = !find_file_name(rec, namebuf, &namelen);
2465         type_recovered = !find_file_type(rec, &type);
2466
2467         if (!name_recovered) {
2468                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2469                        rec->ino, rec->ino);
2470                 namelen = count_digits(rec->ino);
2471                 sprintf(namebuf, "%llu", rec->ino);
2472                 name_recovered = 1;
2473         }
2474         if (!type_recovered) {
2475                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2476                        rec->ino);
2477                 type = BTRFS_FT_REG_FILE;
2478                 type_recovered = 1;
2479         }
2480
2481         ret = reset_nlink(trans, root, path, rec);
2482         if (ret < 0) {
2483                 fprintf(stderr,
2484                         "Failed to reset nlink for inode %llu: %s\n",
2485                         rec->ino, strerror(-ret));
2486                 goto out;
2487         }
2488
2489         if (rec->found_link == 0) {
2490                 lost_found_ino = root->highest_inode;
2491                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2492                         ret = -EOVERFLOW;
2493                         goto out;
2494                 }
2495                 lost_found_ino++;
2496                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2497                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2498                                   mode);
2499                 if (ret < 0) {
2500                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2501                                 dir_name, strerror(-ret));
2502                         goto out;
2503                 }
2504                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2505                                      namebuf, namelen, type, NULL, 1);
2506                 /*
2507                  * Add ".INO" suffix several times to handle case where
2508                  * "FILENAME.INO" is already taken by another file.
2509                  */
2510                 while (ret == -EEXIST) {
2511                         /*
2512                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2513                          */
2514                         if (namelen + count_digits(rec->ino) + 1 >
2515                             BTRFS_NAME_LEN) {
2516                                 ret = -EFBIG;
2517                                 goto out;
2518                         }
2519                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2520                                  ".%llu", rec->ino);
2521                         namelen += count_digits(rec->ino) + 1;
2522                         ret = btrfs_add_link(trans, root, rec->ino,
2523                                              lost_found_ino, namebuf,
2524                                              namelen, type, NULL, 1);
2525                 }
2526                 if (ret < 0) {
2527                         fprintf(stderr,
2528                                 "Failed to link the inode %llu to %s dir: %s\n",
2529                                 rec->ino, dir_name, strerror(-ret));
2530                         goto out;
2531                 }
2532                 /*
2533                  * Just increase the found_link, don't actually add the
2534                  * backref. This will make things easier and this inode
2535                  * record will be freed after the repair is done.
2536                  * So fsck will not report problem about this inode.
2537                  */
2538                 rec->found_link++;
2539                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2540                        namelen, namebuf, dir_name);
2541         }
2542         printf("Fixed the nlink of inode %llu\n", rec->ino);
2543 out:
2544         /*
2545          * Clear the flag anyway, or we will loop forever for the same inode
2546          * as it will not be removed from the bad inode list and the dead loop
2547          * happens.
2548          */
2549         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2550         btrfs_release_path(path);
2551         return ret;
2552 }
2553
2554 /*
2555  * Check if there is any normal(reg or prealloc) file extent for given
2556  * ino.
2557  * This is used to determine the file type when neither its dir_index/item or
2558  * inode_item exists.
2559  *
2560  * This will *NOT* report error, if any error happens, just consider it does
2561  * not have any normal file extent.
2562  */
2563 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2564 {
2565         struct btrfs_path *path;
2566         struct btrfs_key key;
2567         struct btrfs_key found_key;
2568         struct btrfs_file_extent_item *fi;
2569         u8 type;
2570         int ret = 0;
2571
2572         path = btrfs_alloc_path();
2573         if (!path)
2574                 goto out;
2575         key.objectid = ino;
2576         key.type = BTRFS_EXTENT_DATA_KEY;
2577         key.offset = 0;
2578
2579         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2580         if (ret < 0) {
2581                 ret = 0;
2582                 goto out;
2583         }
2584         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2585                 ret = btrfs_next_leaf(root, path);
2586                 if (ret) {
2587                         ret = 0;
2588                         goto out;
2589                 }
2590         }
2591         while (1) {
2592                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2593                                       path->slots[0]);
2594                 if (found_key.objectid != ino ||
2595                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2596                         break;
2597                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2598                                     struct btrfs_file_extent_item);
2599                 type = btrfs_file_extent_type(path->nodes[0], fi);
2600                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2601                         ret = 1;
2602                         goto out;
2603                 }
2604         }
2605 out:
2606         btrfs_free_path(path);
2607         return ret;
2608 }
2609
2610 static u32 btrfs_type_to_imode(u8 type)
2611 {
2612         static u32 imode_by_btrfs_type[] = {
2613                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2614                 [BTRFS_FT_DIR]          = S_IFDIR,
2615                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2616                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2617                 [BTRFS_FT_FIFO]         = S_IFIFO,
2618                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2619                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2620         };
2621
2622         return imode_by_btrfs_type[(type)];
2623 }
2624
2625 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2626                                 struct btrfs_root *root,
2627                                 struct btrfs_path *path,
2628                                 struct inode_record *rec)
2629 {
2630         u8 filetype;
2631         u32 mode = 0700;
2632         int type_recovered = 0;
2633         int ret = 0;
2634
2635         printf("Trying to rebuild inode:%llu\n", rec->ino);
2636
2637         type_recovered = !find_file_type(rec, &filetype);
2638
2639         /*
2640          * Try to determine inode type if type not found.
2641          *
2642          * For found regular file extent, it must be FILE.
2643          * For found dir_item/index, it must be DIR.
2644          *
2645          * For undetermined one, use FILE as fallback.
2646          *
2647          * TODO:
2648          * 1. If found backref(inode_index/item is already handled) to it,
2649          *    it must be DIR.
2650          *    Need new inode-inode ref structure to allow search for that.
2651          */
2652         if (!type_recovered) {
2653                 if (rec->found_file_extent &&
2654                     find_normal_file_extent(root, rec->ino)) {
2655                         type_recovered = 1;
2656                         filetype = BTRFS_FT_REG_FILE;
2657                 } else if (rec->found_dir_item) {
2658                         type_recovered = 1;
2659                         filetype = BTRFS_FT_DIR;
2660                 } else if (!list_empty(&rec->orphan_extents)) {
2661                         type_recovered = 1;
2662                         filetype = BTRFS_FT_REG_FILE;
2663                 } else{
2664                         printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2665                                rec->ino);
2666                         type_recovered = 1;
2667                         filetype = BTRFS_FT_REG_FILE;
2668                 }
2669         }
2670
2671         ret = btrfs_new_inode(trans, root, rec->ino,
2672                               mode | btrfs_type_to_imode(filetype));
2673         if (ret < 0)
2674                 goto out;
2675
2676         /*
2677          * Here inode rebuild is done, we only rebuild the inode item,
2678          * don't repair the nlink(like move to lost+found).
2679          * That is the job of nlink repair.
2680          *
2681          * We just fill the record and return
2682          */
2683         rec->found_dir_item = 1;
2684         rec->imode = mode | btrfs_type_to_imode(filetype);
2685         rec->nlink = 0;
2686         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2687         /* Ensure the inode_nlinks repair function will be called */
2688         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2689 out:
2690         return ret;
2691 }
2692
2693 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2694                                       struct btrfs_root *root,
2695                                       struct btrfs_path *path,
2696                                       struct inode_record *rec)
2697 {
2698         struct orphan_data_extent *orphan;
2699         struct orphan_data_extent *tmp;
2700         int ret = 0;
2701
2702         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2703                 /*
2704                  * Check for conflicting file extents
2705                  *
2706                  * Here we don't know whether the extents is compressed or not,
2707                  * so we can only assume it not compressed nor data offset,
2708                  * and use its disk_len as extent length.
2709                  */
2710                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2711                                        orphan->offset, orphan->disk_len, 0);
2712                 btrfs_release_path(path);
2713                 if (ret < 0)
2714                         goto out;
2715                 if (!ret) {
2716                         fprintf(stderr,
2717                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2718                                 orphan->disk_bytenr, orphan->disk_len);
2719                         ret = btrfs_free_extent(trans,
2720                                         root->fs_info->extent_root,
2721                                         orphan->disk_bytenr, orphan->disk_len,
2722                                         0, root->objectid, orphan->objectid,
2723                                         orphan->offset);
2724                         if (ret < 0)
2725                                 goto out;
2726                 }
2727                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2728                                 orphan->offset, orphan->disk_bytenr,
2729                                 orphan->disk_len, orphan->disk_len);
2730                 if (ret < 0)
2731                         goto out;
2732
2733                 /* Update file size info */
2734                 rec->found_size += orphan->disk_len;
2735                 if (rec->found_size == rec->nbytes)
2736                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2737
2738                 /* Update the file extent hole info too */
2739                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2740                                            orphan->disk_len);
2741                 if (ret < 0)
2742                         goto out;
2743                 if (RB_EMPTY_ROOT(&rec->holes))
2744                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2745
2746                 list_del(&orphan->list);
2747                 free(orphan);
2748         }
2749         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2750 out:
2751         return ret;
2752 }
2753
2754 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2755                                         struct btrfs_root *root,
2756                                         struct btrfs_path *path,
2757                                         struct inode_record *rec)
2758 {
2759         struct rb_node *node;
2760         struct file_extent_hole *hole;
2761         int found = 0;
2762         int ret = 0;
2763
2764         node = rb_first(&rec->holes);
2765
2766         while (node) {
2767                 found = 1;
2768                 hole = rb_entry(node, struct file_extent_hole, node);
2769                 ret = btrfs_punch_hole(trans, root, rec->ino,
2770                                        hole->start, hole->len);
2771                 if (ret < 0)
2772                         goto out;
2773                 ret = del_file_extent_hole(&rec->holes, hole->start,
2774                                            hole->len);
2775                 if (ret < 0)
2776                         goto out;
2777                 if (RB_EMPTY_ROOT(&rec->holes))
2778                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2779                 node = rb_first(&rec->holes);
2780         }
2781         /* special case for a file losing all its file extent */
2782         if (!found) {
2783                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2784                                        round_up(rec->isize, root->sectorsize));
2785                 if (ret < 0)
2786                         goto out;
2787         }
2788         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2789                rec->ino, root->objectid);
2790 out:
2791         return ret;
2792 }
2793
2794 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2795 {
2796         struct btrfs_trans_handle *trans;
2797         struct btrfs_path *path;
2798         int ret = 0;
2799
2800         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2801                              I_ERR_NO_ORPHAN_ITEM |
2802                              I_ERR_LINK_COUNT_WRONG |
2803                              I_ERR_NO_INODE_ITEM |
2804                              I_ERR_FILE_EXTENT_ORPHAN |
2805                              I_ERR_FILE_EXTENT_DISCOUNT|
2806                              I_ERR_FILE_NBYTES_WRONG)))
2807                 return rec->errors;
2808
2809         path = btrfs_alloc_path();
2810         if (!path)
2811                 return -ENOMEM;
2812
2813         /*
2814          * For nlink repair, it may create a dir and add link, so
2815          * 2 for parent(256)'s dir_index and dir_item
2816          * 2 for lost+found dir's inode_item and inode_ref
2817          * 1 for the new inode_ref of the file
2818          * 2 for lost+found dir's dir_index and dir_item for the file
2819          */
2820         trans = btrfs_start_transaction(root, 7);
2821         if (IS_ERR(trans)) {
2822                 btrfs_free_path(path);
2823                 return PTR_ERR(trans);
2824         }
2825
2826         if (rec->errors & I_ERR_NO_INODE_ITEM)
2827                 ret = repair_inode_no_item(trans, root, path, rec);
2828         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2829                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2830         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2831                 ret = repair_inode_discount_extent(trans, root, path, rec);
2832         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2833                 ret = repair_inode_isize(trans, root, path, rec);
2834         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2835                 ret = repair_inode_orphan_item(trans, root, path, rec);
2836         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2837                 ret = repair_inode_nlinks(trans, root, path, rec);
2838         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2839                 ret = repair_inode_nbytes(trans, root, path, rec);
2840         btrfs_commit_transaction(trans, root);
2841         btrfs_free_path(path);
2842         return ret;
2843 }
2844
2845 static int check_inode_recs(struct btrfs_root *root,
2846                             struct cache_tree *inode_cache)
2847 {
2848         struct cache_extent *cache;
2849         struct ptr_node *node;
2850         struct inode_record *rec;
2851         struct inode_backref *backref;
2852         int stage = 0;
2853         int ret = 0;
2854         int err = 0;
2855         u64 error = 0;
2856         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2857
2858         if (btrfs_root_refs(&root->root_item) == 0) {
2859                 if (!cache_tree_empty(inode_cache))
2860                         fprintf(stderr, "warning line %d\n", __LINE__);
2861                 return 0;
2862         }
2863
2864         /*
2865          * We need to record the highest inode number for later 'lost+found'
2866          * dir creation.
2867          * We must select a ino not used/refered by any existing inode, or
2868          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2869          * this may cause 'lost+found' dir has wrong nlinks.
2870          */
2871         cache = last_cache_extent(inode_cache);
2872         if (cache) {
2873                 node = container_of(cache, struct ptr_node, cache);
2874                 rec = node->data;
2875                 if (rec->ino > root->highest_inode)
2876                         root->highest_inode = rec->ino;
2877         }
2878
2879         /*
2880          * We need to repair backrefs first because we could change some of the
2881          * errors in the inode recs.
2882          *
2883          * We also need to go through and delete invalid backrefs first and then
2884          * add the correct ones second.  We do this because we may get EEXIST
2885          * when adding back the correct index because we hadn't yet deleted the
2886          * invalid index.
2887          *
2888          * For example, if we were missing a dir index then the directories
2889          * isize would be wrong, so if we fixed the isize to what we thought it
2890          * would be and then fixed the backref we'd still have a invalid fs, so
2891          * we need to add back the dir index and then check to see if the isize
2892          * is still wrong.
2893          */
2894         while (stage < 3) {
2895                 stage++;
2896                 if (stage == 3 && !err)
2897                         break;
2898
2899                 cache = search_cache_extent(inode_cache, 0);
2900                 while (repair && cache) {
2901                         node = container_of(cache, struct ptr_node, cache);
2902                         rec = node->data;
2903                         cache = next_cache_extent(cache);
2904
2905                         /* Need to free everything up and rescan */
2906                         if (stage == 3) {
2907                                 remove_cache_extent(inode_cache, &node->cache);
2908                                 free(node);
2909                                 free_inode_rec(rec);
2910                                 continue;
2911                         }
2912
2913                         if (list_empty(&rec->backrefs))
2914                                 continue;
2915
2916                         ret = repair_inode_backrefs(root, rec, inode_cache,
2917                                                     stage == 1);
2918                         if (ret < 0) {
2919                                 err = ret;
2920                                 stage = 2;
2921                                 break;
2922                         } if (ret > 0) {
2923                                 err = -EAGAIN;
2924                         }
2925                 }
2926         }
2927         if (err)
2928                 return err;
2929
2930         rec = get_inode_rec(inode_cache, root_dirid, 0);
2931         BUG_ON(IS_ERR(rec));
2932         if (rec) {
2933                 ret = check_root_dir(rec);
2934                 if (ret) {
2935                         fprintf(stderr, "root %llu root dir %llu error\n",
2936                                 (unsigned long long)root->root_key.objectid,
2937                                 (unsigned long long)root_dirid);
2938                         print_inode_error(root, rec);
2939                         error++;
2940                 }
2941         } else {
2942                 if (repair) {
2943                         struct btrfs_trans_handle *trans;
2944
2945                         trans = btrfs_start_transaction(root, 1);
2946                         if (IS_ERR(trans)) {
2947                                 err = PTR_ERR(trans);
2948                                 return err;
2949                         }
2950
2951                         fprintf(stderr,
2952                                 "root %llu missing its root dir, recreating\n",
2953                                 (unsigned long long)root->objectid);
2954
2955                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2956                         BUG_ON(ret);
2957
2958                         btrfs_commit_transaction(trans, root);
2959                         return -EAGAIN;
2960                 }
2961
2962                 fprintf(stderr, "root %llu root dir %llu not found\n",
2963                         (unsigned long long)root->root_key.objectid,
2964                         (unsigned long long)root_dirid);
2965         }
2966
2967         while (1) {
2968                 cache = search_cache_extent(inode_cache, 0);
2969                 if (!cache)
2970                         break;
2971                 node = container_of(cache, struct ptr_node, cache);
2972                 rec = node->data;
2973                 remove_cache_extent(inode_cache, &node->cache);
2974                 free(node);
2975                 if (rec->ino == root_dirid ||
2976                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2977                         free_inode_rec(rec);
2978                         continue;
2979                 }
2980
2981                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2982                         ret = check_orphan_item(root, rec->ino);
2983                         if (ret == 0)
2984                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2985                         if (can_free_inode_rec(rec)) {
2986                                 free_inode_rec(rec);
2987                                 continue;
2988                         }
2989                 }
2990
2991                 if (!rec->found_inode_item)
2992                         rec->errors |= I_ERR_NO_INODE_ITEM;
2993                 if (rec->found_link != rec->nlink)
2994                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2995                 if (repair) {
2996                         ret = try_repair_inode(root, rec);
2997                         if (ret == 0 && can_free_inode_rec(rec)) {
2998                                 free_inode_rec(rec);
2999                                 continue;
3000                         }
3001                         ret = 0;
3002                 }
3003
3004                 if (!(repair && ret == 0))
3005                         error++;
3006                 print_inode_error(root, rec);
3007                 list_for_each_entry(backref, &rec->backrefs, list) {
3008                         if (!backref->found_dir_item)
3009                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3010                         if (!backref->found_dir_index)
3011                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3012                         if (!backref->found_inode_ref)
3013                                 backref->errors |= REF_ERR_NO_INODE_REF;
3014                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3015                                 " namelen %u name %s filetype %d errors %x",
3016                                 (unsigned long long)backref->dir,
3017                                 (unsigned long long)backref->index,
3018                                 backref->namelen, backref->name,
3019                                 backref->filetype, backref->errors);
3020                         print_ref_error(backref->errors);
3021                 }
3022                 free_inode_rec(rec);
3023         }
3024         return (error > 0) ? -1 : 0;
3025 }
3026
3027 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3028                                         u64 objectid)
3029 {
3030         struct cache_extent *cache;
3031         struct root_record *rec = NULL;
3032         int ret;
3033
3034         cache = lookup_cache_extent(root_cache, objectid, 1);
3035         if (cache) {
3036                 rec = container_of(cache, struct root_record, cache);
3037         } else {
3038                 rec = calloc(1, sizeof(*rec));
3039                 rec->objectid = objectid;
3040                 INIT_LIST_HEAD(&rec->backrefs);
3041                 rec->cache.start = objectid;
3042                 rec->cache.size = 1;
3043
3044                 ret = insert_cache_extent(root_cache, &rec->cache);
3045                 BUG_ON(ret);
3046         }
3047         return rec;
3048 }
3049
3050 static struct root_backref *get_root_backref(struct root_record *rec,
3051                                              u64 ref_root, u64 dir, u64 index,
3052                                              const char *name, int namelen)
3053 {
3054         struct root_backref *backref;
3055
3056         list_for_each_entry(backref, &rec->backrefs, list) {
3057                 if (backref->ref_root != ref_root || backref->dir != dir ||
3058                     backref->namelen != namelen)
3059                         continue;
3060                 if (memcmp(name, backref->name, namelen))
3061                         continue;
3062                 return backref;
3063         }
3064
3065         backref = calloc(1, sizeof(*backref) + namelen + 1);
3066         backref->ref_root = ref_root;
3067         backref->dir = dir;
3068         backref->index = index;
3069         backref->namelen = namelen;
3070         memcpy(backref->name, name, namelen);
3071         backref->name[namelen] = '\0';
3072         list_add_tail(&backref->list, &rec->backrefs);
3073         return backref;
3074 }
3075
3076 static void free_root_record(struct cache_extent *cache)
3077 {
3078         struct root_record *rec;
3079         struct root_backref *backref;
3080
3081         rec = container_of(cache, struct root_record, cache);
3082         while (!list_empty(&rec->backrefs)) {
3083                 backref = list_entry(rec->backrefs.next,
3084                                      struct root_backref, list);
3085                 list_del(&backref->list);
3086                 free(backref);
3087         }
3088
3089         kfree(rec);
3090 }
3091
3092 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3093
3094 static int add_root_backref(struct cache_tree *root_cache,
3095                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3096                             const char *name, int namelen,
3097                             int item_type, int errors)
3098 {
3099         struct root_record *rec;
3100         struct root_backref *backref;
3101
3102         rec = get_root_rec(root_cache, root_id);
3103         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3104
3105         backref->errors |= errors;
3106
3107         if (item_type != BTRFS_DIR_ITEM_KEY) {
3108                 if (backref->found_dir_index || backref->found_back_ref ||
3109                     backref->found_forward_ref) {
3110                         if (backref->index != index)
3111                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3112                 } else {
3113                         backref->index = index;
3114                 }
3115         }
3116
3117         if (item_type == BTRFS_DIR_ITEM_KEY) {
3118                 if (backref->found_forward_ref)
3119                         rec->found_ref++;
3120                 backref->found_dir_item = 1;
3121         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3122                 backref->found_dir_index = 1;
3123         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3124                 if (backref->found_forward_ref)
3125                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3126                 else if (backref->found_dir_item)
3127                         rec->found_ref++;
3128                 backref->found_forward_ref = 1;
3129         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3130                 if (backref->found_back_ref)
3131                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3132                 backref->found_back_ref = 1;
3133         } else {
3134                 BUG_ON(1);
3135         }
3136
3137         if (backref->found_forward_ref && backref->found_dir_item)
3138                 backref->reachable = 1;
3139         return 0;
3140 }
3141
3142 static int merge_root_recs(struct btrfs_root *root,
3143                            struct cache_tree *src_cache,
3144                            struct cache_tree *dst_cache)
3145 {
3146         struct cache_extent *cache;
3147         struct ptr_node *node;
3148         struct inode_record *rec;
3149         struct inode_backref *backref;
3150         int ret = 0;
3151
3152         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3153                 free_inode_recs_tree(src_cache);
3154                 return 0;
3155         }
3156
3157         while (1) {
3158                 cache = search_cache_extent(src_cache, 0);
3159                 if (!cache)
3160                         break;
3161                 node = container_of(cache, struct ptr_node, cache);
3162                 rec = node->data;
3163                 remove_cache_extent(src_cache, &node->cache);
3164                 free(node);
3165
3166                 ret = is_child_root(root, root->objectid, rec->ino);
3167                 if (ret < 0)
3168                         break;
3169                 else if (ret == 0)
3170                         goto skip;
3171
3172                 list_for_each_entry(backref, &rec->backrefs, list) {
3173                         BUG_ON(backref->found_inode_ref);
3174                         if (backref->found_dir_item)
3175                                 add_root_backref(dst_cache, rec->ino,
3176                                         root->root_key.objectid, backref->dir,
3177                                         backref->index, backref->name,
3178                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3179                                         backref->errors);
3180                         if (backref->found_dir_index)
3181                                 add_root_backref(dst_cache, rec->ino,
3182                                         root->root_key.objectid, backref->dir,
3183                                         backref->index, backref->name,
3184                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3185                                         backref->errors);
3186                 }
3187 skip:
3188                 free_inode_rec(rec);
3189         }
3190         if (ret < 0)
3191                 return ret;
3192         return 0;
3193 }
3194
3195 static int check_root_refs(struct btrfs_root *root,
3196                            struct cache_tree *root_cache)
3197 {
3198         struct root_record *rec;
3199         struct root_record *ref_root;
3200         struct root_backref *backref;
3201         struct cache_extent *cache;
3202         int loop = 1;
3203         int ret;
3204         int error;
3205         int errors = 0;
3206
3207         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3208         rec->found_ref = 1;
3209
3210         /* fixme: this can not detect circular references */
3211         while (loop) {
3212                 loop = 0;
3213                 cache = search_cache_extent(root_cache, 0);
3214                 while (1) {
3215                         if (!cache)
3216                                 break;
3217                         rec = container_of(cache, struct root_record, cache);
3218                         cache = next_cache_extent(cache);
3219
3220                         if (rec->found_ref == 0)
3221                                 continue;
3222
3223                         list_for_each_entry(backref, &rec->backrefs, list) {
3224                                 if (!backref->reachable)
3225                                         continue;
3226
3227                                 ref_root = get_root_rec(root_cache,
3228                                                         backref->ref_root);
3229                                 if (ref_root->found_ref > 0)
3230                                         continue;
3231
3232                                 backref->reachable = 0;
3233                                 rec->found_ref--;
3234                                 if (rec->found_ref == 0)
3235                                         loop = 1;
3236                         }
3237                 }
3238         }
3239
3240         cache = search_cache_extent(root_cache, 0);
3241         while (1) {
3242                 if (!cache)
3243                         break;
3244                 rec = container_of(cache, struct root_record, cache);
3245                 cache = next_cache_extent(cache);
3246
3247                 if (rec->found_ref == 0 &&
3248                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3249                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3250                         ret = check_orphan_item(root->fs_info->tree_root,
3251                                                 rec->objectid);
3252                         if (ret == 0)
3253                                 continue;
3254
3255                         /*
3256                          * If we don't have a root item then we likely just have
3257                          * a dir item in a snapshot for this root but no actual
3258                          * ref key or anything so it's meaningless.
3259                          */
3260                         if (!rec->found_root_item)
3261                                 continue;
3262                         errors++;
3263                         fprintf(stderr, "fs tree %llu not referenced\n",
3264                                 (unsigned long long)rec->objectid);
3265                 }
3266
3267                 error = 0;
3268                 if (rec->found_ref > 0 && !rec->found_root_item)
3269                         error = 1;
3270                 list_for_each_entry(backref, &rec->backrefs, list) {
3271                         if (!backref->found_dir_item)
3272                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3273                         if (!backref->found_dir_index)
3274                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3275                         if (!backref->found_back_ref)
3276                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3277                         if (!backref->found_forward_ref)
3278                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3279                         if (backref->reachable && backref->errors)
3280                                 error = 1;
3281                 }
3282                 if (!error)
3283                         continue;
3284
3285                 errors++;
3286                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3287                         (unsigned long long)rec->objectid, rec->found_ref,
3288                          rec->found_root_item ? "" : "not found");
3289
3290                 list_for_each_entry(backref, &rec->backrefs, list) {
3291                         if (!backref->reachable)
3292                                 continue;
3293                         if (!backref->errors && rec->found_root_item)
3294                                 continue;
3295                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3296                                 " index %llu namelen %u name %s errors %x\n",
3297                                 (unsigned long long)backref->ref_root,
3298                                 (unsigned long long)backref->dir,
3299                                 (unsigned long long)backref->index,
3300                                 backref->namelen, backref->name,
3301                                 backref->errors);
3302                         print_ref_error(backref->errors);
3303                 }
3304         }
3305         return errors > 0 ? 1 : 0;
3306 }
3307
3308 static int process_root_ref(struct extent_buffer *eb, int slot,
3309                             struct btrfs_key *key,
3310                             struct cache_tree *root_cache)
3311 {
3312         u64 dirid;
3313         u64 index;
3314         u32 len;
3315         u32 name_len;
3316         struct btrfs_root_ref *ref;
3317         char namebuf[BTRFS_NAME_LEN];
3318         int error;
3319
3320         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3321
3322         dirid = btrfs_root_ref_dirid(eb, ref);
3323         index = btrfs_root_ref_sequence(eb, ref);
3324         name_len = btrfs_root_ref_name_len(eb, ref);
3325
3326         if (name_len <= BTRFS_NAME_LEN) {
3327                 len = name_len;
3328                 error = 0;
3329         } else {
3330                 len = BTRFS_NAME_LEN;
3331                 error = REF_ERR_NAME_TOO_LONG;
3332         }
3333         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3334
3335         if (key->type == BTRFS_ROOT_REF_KEY) {
3336                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3337                                  index, namebuf, len, key->type, error);
3338         } else {
3339                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3340                                  index, namebuf, len, key->type, error);
3341         }
3342         return 0;
3343 }
3344
3345 static void free_corrupt_block(struct cache_extent *cache)
3346 {
3347         struct btrfs_corrupt_block *corrupt;
3348
3349         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3350         free(corrupt);
3351 }
3352
3353 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3354
3355 /*
3356  * Repair the btree of the given root.
3357  *
3358  * The fix is to remove the node key in corrupt_blocks cache_tree.
3359  * and rebalance the tree.
3360  * After the fix, the btree should be writeable.
3361  */
3362 static int repair_btree(struct btrfs_root *root,
3363                         struct cache_tree *corrupt_blocks)
3364 {
3365         struct btrfs_trans_handle *trans;
3366         struct btrfs_path *path;
3367         struct btrfs_corrupt_block *corrupt;
3368         struct cache_extent *cache;
3369         struct btrfs_key key;
3370         u64 offset;
3371         int level;
3372         int ret = 0;
3373
3374         if (cache_tree_empty(corrupt_blocks))
3375                 return 0;
3376
3377         path = btrfs_alloc_path();
3378         if (!path)
3379                 return -ENOMEM;
3380
3381         trans = btrfs_start_transaction(root, 1);
3382         if (IS_ERR(trans)) {
3383                 ret = PTR_ERR(trans);
3384                 fprintf(stderr, "Error starting transaction: %s\n",
3385                         strerror(-ret));
3386                 goto out_free_path;
3387         }
3388         cache = first_cache_extent(corrupt_blocks);
3389         while (cache) {
3390                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3391                                        cache);
3392                 level = corrupt->level;
3393                 path->lowest_level = level;
3394                 key.objectid = corrupt->key.objectid;
3395                 key.type = corrupt->key.type;
3396                 key.offset = corrupt->key.offset;
3397
3398                 /*
3399                  * Here we don't want to do any tree balance, since it may
3400                  * cause a balance with corrupted brother leaf/node,
3401                  * so ins_len set to 0 here.
3402                  * Balance will be done after all corrupt node/leaf is deleted.
3403                  */
3404                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3405                 if (ret < 0)
3406                         goto out;
3407                 offset = btrfs_node_blockptr(path->nodes[level],
3408                                              path->slots[level]);
3409
3410                 /* Remove the ptr */
3411                 ret = btrfs_del_ptr(trans, root, path, level,
3412                                     path->slots[level]);
3413                 if (ret < 0)
3414                         goto out;
3415                 /*
3416                  * Remove the corresponding extent
3417                  * return value is not concerned.
3418                  */
3419                 btrfs_release_path(path);
3420                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3421                                         0, root->root_key.objectid,
3422                                         level - 1, 0);
3423                 cache = next_cache_extent(cache);
3424         }
3425
3426         /* Balance the btree using btrfs_search_slot() */
3427         cache = first_cache_extent(corrupt_blocks);
3428         while (cache) {
3429                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3430                                        cache);
3431                 memcpy(&key, &corrupt->key, sizeof(key));
3432                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3433                 if (ret < 0)
3434                         goto out;
3435                 /* return will always >0 since it won't find the item */
3436                 ret = 0;
3437                 btrfs_release_path(path);
3438                 cache = next_cache_extent(cache);
3439         }
3440 out:
3441         btrfs_commit_transaction(trans, root);
3442 out_free_path:
3443         btrfs_free_path(path);
3444         return ret;
3445 }
3446
3447 static int check_fs_root(struct btrfs_root *root,
3448                          struct cache_tree *root_cache,
3449                          struct walk_control *wc)
3450 {
3451         int ret = 0;
3452         int err = 0;
3453         int wret;
3454         int level;
3455         struct btrfs_path path;
3456         struct shared_node root_node;
3457         struct root_record *rec;
3458         struct btrfs_root_item *root_item = &root->root_item;
3459         struct cache_tree corrupt_blocks;
3460         struct orphan_data_extent *orphan;
3461         struct orphan_data_extent *tmp;
3462         enum btrfs_tree_block_status status;
3463
3464         /*
3465          * Reuse the corrupt_block cache tree to record corrupted tree block
3466          *
3467          * Unlike the usage in extent tree check, here we do it in a per
3468          * fs/subvol tree base.
3469          */
3470         cache_tree_init(&corrupt_blocks);
3471         root->fs_info->corrupt_blocks = &corrupt_blocks;
3472
3473         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3474                 rec = get_root_rec(root_cache, root->root_key.objectid);
3475                 if (btrfs_root_refs(root_item) > 0)
3476                         rec->found_root_item = 1;
3477         }
3478
3479         btrfs_init_path(&path);
3480         memset(&root_node, 0, sizeof(root_node));
3481         cache_tree_init(&root_node.root_cache);
3482         cache_tree_init(&root_node.inode_cache);
3483
3484         /* Move the orphan extent record to corresponding inode_record */
3485         list_for_each_entry_safe(orphan, tmp,
3486                                  &root->orphan_data_extents, list) {
3487                 struct inode_record *inode;
3488
3489                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3490                                       1);
3491                 BUG_ON(IS_ERR(inode));
3492                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3493                 list_move(&orphan->list, &inode->orphan_extents);
3494         }
3495
3496         level = btrfs_header_level(root->node);
3497         memset(wc->nodes, 0, sizeof(wc->nodes));
3498         wc->nodes[level] = &root_node;
3499         wc->active_node = level;
3500         wc->root_level = level;
3501
3502         /* We may not have checked the root block, lets do that now */
3503         if (btrfs_is_leaf(root->node))
3504                 status = btrfs_check_leaf(root, NULL, root->node);
3505         else
3506                 status = btrfs_check_node(root, NULL, root->node);
3507         if (status != BTRFS_TREE_BLOCK_CLEAN)
3508                 return -EIO;
3509
3510         if (btrfs_root_refs(root_item) > 0 ||
3511             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3512                 path.nodes[level] = root->node;
3513                 extent_buffer_get(root->node);
3514                 path.slots[level] = 0;
3515         } else {
3516                 struct btrfs_key key;
3517                 struct btrfs_disk_key found_key;
3518
3519                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3520                 level = root_item->drop_level;
3521                 path.lowest_level = level;
3522                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3523                 if (wret < 0)
3524                         goto skip_walking;
3525                 btrfs_node_key(path.nodes[level], &found_key,
3526                                 path.slots[level]);
3527                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3528                                         sizeof(found_key)));
3529         }
3530
3531         while (1) {
3532                 wret = walk_down_tree(root, &path, wc, &level);
3533                 if (wret < 0)
3534                         ret = wret;
3535                 if (wret != 0)
3536                         break;
3537
3538                 wret = walk_up_tree(root, &path, wc, &level);
3539                 if (wret < 0)
3540                         ret = wret;
3541                 if (wret != 0)
3542                         break;
3543         }
3544 skip_walking:
3545         btrfs_release_path(&path);
3546
3547         if (!cache_tree_empty(&corrupt_blocks)) {
3548                 struct cache_extent *cache;
3549                 struct btrfs_corrupt_block *corrupt;
3550
3551                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3552                        root->root_key.objectid);
3553                 cache = first_cache_extent(&corrupt_blocks);
3554                 while (cache) {
3555                         corrupt = container_of(cache,
3556                                                struct btrfs_corrupt_block,
3557                                                cache);
3558                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3559                                cache->start, corrupt->level,
3560                                corrupt->key.objectid, corrupt->key.type,
3561                                corrupt->key.offset);
3562                         cache = next_cache_extent(cache);
3563                 }
3564                 if (repair) {
3565                         printf("Try to repair the btree for root %llu\n",
3566                                root->root_key.objectid);
3567                         ret = repair_btree(root, &corrupt_blocks);
3568                         if (ret < 0)
3569                                 fprintf(stderr, "Failed to repair btree: %s\n",
3570                                         strerror(-ret));
3571                         if (!ret)
3572                                 printf("Btree for root %llu is fixed\n",
3573                                        root->root_key.objectid);
3574                 }
3575         }
3576
3577         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3578         if (err < 0)
3579                 ret = err;
3580
3581         if (root_node.current) {
3582                 root_node.current->checked = 1;
3583                 maybe_free_inode_rec(&root_node.inode_cache,
3584                                 root_node.current);
3585         }
3586
3587         err = check_inode_recs(root, &root_node.inode_cache);
3588         if (!ret)
3589                 ret = err;
3590
3591         free_corrupt_blocks_tree(&corrupt_blocks);
3592         root->fs_info->corrupt_blocks = NULL;
3593         free_orphan_data_extents(&root->orphan_data_extents);
3594         return ret;
3595 }
3596
3597 static int fs_root_objectid(u64 objectid)
3598 {
3599         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3600             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3601                 return 1;
3602         return is_fstree(objectid);
3603 }
3604
3605 static int check_fs_roots(struct btrfs_root *root,
3606                           struct cache_tree *root_cache)
3607 {
3608         struct btrfs_path path;
3609         struct btrfs_key key;
3610         struct walk_control wc;
3611         struct extent_buffer *leaf, *tree_node;
3612         struct btrfs_root *tmp_root;
3613         struct btrfs_root *tree_root = root->fs_info->tree_root;
3614         int ret;
3615         int err = 0;
3616
3617         if (ctx.progress_enabled) {
3618                 ctx.tp = TASK_FS_ROOTS;
3619                 task_start(ctx.info);
3620         }
3621
3622         /*
3623          * Just in case we made any changes to the extent tree that weren't
3624          * reflected into the free space cache yet.
3625          */
3626         if (repair)
3627                 reset_cached_block_groups(root->fs_info);
3628         memset(&wc, 0, sizeof(wc));
3629         cache_tree_init(&wc.shared);
3630         btrfs_init_path(&path);
3631
3632 again:
3633         key.offset = 0;
3634         key.objectid = 0;
3635         key.type = BTRFS_ROOT_ITEM_KEY;
3636         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3637         if (ret < 0) {
3638                 err = 1;
3639                 goto out;
3640         }
3641         tree_node = tree_root->node;
3642         while (1) {
3643                 if (tree_node != tree_root->node) {
3644                         free_root_recs_tree(root_cache);
3645                         btrfs_release_path(&path);
3646                         goto again;
3647                 }
3648                 leaf = path.nodes[0];
3649                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3650                         ret = btrfs_next_leaf(tree_root, &path);
3651                         if (ret) {
3652                                 if (ret < 0)
3653                                         err = 1;
3654                                 break;
3655                         }
3656                         leaf = path.nodes[0];
3657                 }
3658                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3659                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3660                     fs_root_objectid(key.objectid)) {
3661                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3662                                 tmp_root = btrfs_read_fs_root_no_cache(
3663                                                 root->fs_info, &key);
3664                         } else {
3665                                 key.offset = (u64)-1;
3666                                 tmp_root = btrfs_read_fs_root(
3667                                                 root->fs_info, &key);
3668                         }
3669                         if (IS_ERR(tmp_root)) {
3670                                 err = 1;
3671                                 goto next;
3672                         }
3673                         ret = check_fs_root(tmp_root, root_cache, &wc);
3674                         if (ret == -EAGAIN) {
3675                                 free_root_recs_tree(root_cache);
3676                                 btrfs_release_path(&path);
3677                                 goto again;
3678                         }
3679                         if (ret)
3680                                 err = 1;
3681                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3682                                 btrfs_free_fs_root(tmp_root);
3683                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3684                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3685                         process_root_ref(leaf, path.slots[0], &key,
3686                                          root_cache);
3687                 }
3688 next:
3689                 path.slots[0]++;
3690         }
3691 out:
3692         btrfs_release_path(&path);
3693         if (err)
3694                 free_extent_cache_tree(&wc.shared);
3695         if (!cache_tree_empty(&wc.shared))
3696                 fprintf(stderr, "warning line %d\n", __LINE__);
3697
3698         task_stop(ctx.info);
3699
3700         return err;
3701 }
3702
3703 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3704 {
3705         struct list_head *cur = rec->backrefs.next;
3706         struct extent_backref *back;
3707         struct tree_backref *tback;
3708         struct data_backref *dback;
3709         u64 found = 0;
3710         int err = 0;
3711
3712         while(cur != &rec->backrefs) {
3713                 back = list_entry(cur, struct extent_backref, list);
3714                 cur = cur->next;
3715                 if (!back->found_extent_tree) {
3716                         err = 1;
3717                         if (!print_errs)
3718                                 goto out;
3719                         if (back->is_data) {
3720                                 dback = (struct data_backref *)back;
3721                                 fprintf(stderr, "Backref %llu %s %llu"
3722                                         " owner %llu offset %llu num_refs %lu"
3723                                         " not found in extent tree\n",
3724                                         (unsigned long long)rec->start,
3725                                         back->full_backref ?
3726                                         "parent" : "root",
3727                                         back->full_backref ?
3728                                         (unsigned long long)dback->parent:
3729                                         (unsigned long long)dback->root,
3730                                         (unsigned long long)dback->owner,
3731                                         (unsigned long long)dback->offset,
3732                                         (unsigned long)dback->num_refs);
3733                         } else {
3734                                 tback = (struct tree_backref *)back;
3735                                 fprintf(stderr, "Backref %llu parent %llu"
3736                                         " root %llu not found in extent tree\n",
3737                                         (unsigned long long)rec->start,
3738                                         (unsigned long long)tback->parent,
3739                                         (unsigned long long)tback->root);
3740                         }
3741                 }
3742                 if (!back->is_data && !back->found_ref) {
3743                         err = 1;
3744                         if (!print_errs)
3745                                 goto out;
3746                         tback = (struct tree_backref *)back;
3747                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3748                                 (unsigned long long)rec->start,
3749                                 back->full_backref ? "parent" : "root",
3750                                 back->full_backref ?
3751                                 (unsigned long long)tback->parent :
3752                                 (unsigned long long)tback->root, back);
3753                 }
3754                 if (back->is_data) {
3755                         dback = (struct data_backref *)back;
3756                         if (dback->found_ref != dback->num_refs) {
3757                                 err = 1;
3758                                 if (!print_errs)
3759                                         goto out;
3760                                 fprintf(stderr, "Incorrect local backref count"
3761                                         " on %llu %s %llu owner %llu"
3762                                         " offset %llu found %u wanted %u back %p\n",
3763                                         (unsigned long long)rec->start,
3764                                         back->full_backref ?
3765                                         "parent" : "root",
3766                                         back->full_backref ?
3767                                         (unsigned long long)dback->parent:
3768                                         (unsigned long long)dback->root,
3769                                         (unsigned long long)dback->owner,
3770                                         (unsigned long long)dback->offset,
3771                                         dback->found_ref, dback->num_refs, back);
3772                         }
3773                         if (dback->disk_bytenr != rec->start) {
3774                                 err = 1;
3775                                 if (!print_errs)
3776                                         goto out;
3777                                 fprintf(stderr, "Backref disk bytenr does not"
3778                                         " match extent record, bytenr=%llu, "
3779                                         "ref bytenr=%llu\n",
3780                                         (unsigned long long)rec->start,
3781                                         (unsigned long long)dback->disk_bytenr);
3782                         }
3783
3784                         if (dback->bytes != rec->nr) {
3785                                 err = 1;
3786                                 if (!print_errs)
3787                                         goto out;
3788                                 fprintf(stderr, "Backref bytes do not match "
3789                                         "extent backref, bytenr=%llu, ref "
3790                                         "bytes=%llu, backref bytes=%llu\n",
3791                                         (unsigned long long)rec->start,
3792                                         (unsigned long long)rec->nr,
3793                                         (unsigned long long)dback->bytes);
3794                         }
3795                 }
3796                 if (!back->is_data) {
3797                         found += 1;
3798                 } else {
3799                         dback = (struct data_backref *)back;
3800                         found += dback->found_ref;
3801                 }
3802         }
3803         if (found != rec->refs) {
3804                 err = 1;
3805                 if (!print_errs)
3806                         goto out;
3807                 fprintf(stderr, "Incorrect global backref count "
3808                         "on %llu found %llu wanted %llu\n",
3809                         (unsigned long long)rec->start,
3810                         (unsigned long long)found,
3811                         (unsigned long long)rec->refs);
3812         }
3813 out:
3814         return err;
3815 }
3816
3817 static int free_all_extent_backrefs(struct extent_record *rec)
3818 {
3819         struct extent_backref *back;
3820         struct list_head *cur;
3821         while (!list_empty(&rec->backrefs)) {
3822                 cur = rec->backrefs.next;
3823                 back = list_entry(cur, struct extent_backref, list);
3824                 list_del(cur);
3825                 free(back);
3826         }
3827         return 0;
3828 }
3829
3830 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3831                                      struct cache_tree *extent_cache)
3832 {
3833         struct cache_extent *cache;
3834         struct extent_record *rec;
3835
3836         while (1) {
3837                 cache = first_cache_extent(extent_cache);
3838                 if (!cache)
3839                         break;
3840                 rec = container_of(cache, struct extent_record, cache);
3841                 remove_cache_extent(extent_cache, cache);
3842                 free_all_extent_backrefs(rec);
3843                 free(rec);
3844         }
3845 }
3846
3847 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3848                                  struct extent_record *rec)
3849 {
3850         if (rec->content_checked && rec->owner_ref_checked &&
3851             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3852             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3853             !rec->bad_full_backref && !rec->crossing_stripes &&
3854             !rec->wrong_chunk_type) {
3855                 remove_cache_extent(extent_cache, &rec->cache);
3856                 free_all_extent_backrefs(rec);
3857                 list_del_init(&rec->list);
3858                 free(rec);
3859         }
3860         return 0;
3861 }
3862
3863 static int check_owner_ref(struct btrfs_root *root,
3864                             struct extent_record *rec,
3865                             struct extent_buffer *buf)
3866 {
3867         struct extent_backref *node;
3868         struct tree_backref *back;
3869         struct btrfs_root *ref_root;
3870         struct btrfs_key key;
3871         struct btrfs_path path;
3872         struct extent_buffer *parent;
3873         int level;
3874         int found = 0;
3875         int ret;
3876
3877         list_for_each_entry(node, &rec->backrefs, list) {
3878                 if (node->is_data)
3879                         continue;
3880                 if (!node->found_ref)
3881                         continue;
3882                 if (node->full_backref)
3883                         continue;
3884                 back = (struct tree_backref *)node;
3885                 if (btrfs_header_owner(buf) == back->root)
3886                         return 0;
3887         }
3888         BUG_ON(rec->is_root);
3889
3890         /* try to find the block by search corresponding fs tree */
3891         key.objectid = btrfs_header_owner(buf);
3892         key.type = BTRFS_ROOT_ITEM_KEY;
3893         key.offset = (u64)-1;
3894
3895         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3896         if (IS_ERR(ref_root))
3897                 return 1;
3898
3899         level = btrfs_header_level(buf);
3900         if (level == 0)
3901                 btrfs_item_key_to_cpu(buf, &key, 0);
3902         else
3903                 btrfs_node_key_to_cpu(buf, &key, 0);
3904
3905         btrfs_init_path(&path);
3906         path.lowest_level = level + 1;
3907         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3908         if (ret < 0)
3909                 return 0;
3910
3911         parent = path.nodes[level + 1];
3912         if (parent && buf->start == btrfs_node_blockptr(parent,
3913                                                         path.slots[level + 1]))
3914                 found = 1;
3915
3916         btrfs_release_path(&path);
3917         return found ? 0 : 1;
3918 }
3919
3920 static int is_extent_tree_record(struct extent_record *rec)
3921 {
3922         struct list_head *cur = rec->backrefs.next;
3923         struct extent_backref *node;
3924         struct tree_backref *back;
3925         int is_extent = 0;
3926
3927         while(cur != &rec->backrefs) {
3928                 node = list_entry(cur, struct extent_backref, list);
3929                 cur = cur->next;
3930                 if (node->is_data)
3931                         return 0;
3932                 back = (struct tree_backref *)node;
3933                 if (node->full_backref)
3934                         return 0;
3935                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3936                         is_extent = 1;
3937         }
3938         return is_extent;
3939 }
3940
3941
3942 static int record_bad_block_io(struct btrfs_fs_info *info,
3943                                struct cache_tree *extent_cache,
3944                                u64 start, u64 len)
3945 {
3946         struct extent_record *rec;
3947         struct cache_extent *cache;
3948         struct btrfs_key key;
3949
3950         cache = lookup_cache_extent(extent_cache, start, len);
3951         if (!cache)
3952                 return 0;
3953
3954         rec = container_of(cache, struct extent_record, cache);
3955         if (!is_extent_tree_record(rec))
3956                 return 0;
3957
3958         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3959         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3960 }
3961
3962 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3963                        struct extent_buffer *buf, int slot)
3964 {
3965         if (btrfs_header_level(buf)) {
3966                 struct btrfs_key_ptr ptr1, ptr2;
3967
3968                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3969                                    sizeof(struct btrfs_key_ptr));
3970                 read_extent_buffer(buf, &ptr2,
3971                                    btrfs_node_key_ptr_offset(slot + 1),
3972                                    sizeof(struct btrfs_key_ptr));
3973                 write_extent_buffer(buf, &ptr1,
3974                                     btrfs_node_key_ptr_offset(slot + 1),
3975                                     sizeof(struct btrfs_key_ptr));
3976                 write_extent_buffer(buf, &ptr2,
3977                                     btrfs_node_key_ptr_offset(slot),
3978                                     sizeof(struct btrfs_key_ptr));
3979                 if (slot == 0) {
3980                         struct btrfs_disk_key key;
3981                         btrfs_node_key(buf, &key, 0);
3982                         btrfs_fixup_low_keys(root, path, &key,
3983                                              btrfs_header_level(buf) + 1);
3984                 }
3985         } else {
3986                 struct btrfs_item *item1, *item2;
3987                 struct btrfs_key k1, k2;
3988                 char *item1_data, *item2_data;
3989                 u32 item1_offset, item2_offset, item1_size, item2_size;
3990
3991                 item1 = btrfs_item_nr(slot);
3992                 item2 = btrfs_item_nr(slot + 1);
3993                 btrfs_item_key_to_cpu(buf, &k1, slot);
3994                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3995                 item1_offset = btrfs_item_offset(buf, item1);
3996                 item2_offset = btrfs_item_offset(buf, item2);
3997                 item1_size = btrfs_item_size(buf, item1);
3998                 item2_size = btrfs_item_size(buf, item2);
3999
4000                 item1_data = malloc(item1_size);
4001                 if (!item1_data)
4002                         return -ENOMEM;
4003                 item2_data = malloc(item2_size);
4004                 if (!item2_data) {
4005                         free(item1_data);
4006                         return -ENOMEM;
4007                 }
4008
4009                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4010                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4011
4012                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4013                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4014                 free(item1_data);
4015                 free(item2_data);
4016
4017                 btrfs_set_item_offset(buf, item1, item2_offset);
4018                 btrfs_set_item_offset(buf, item2, item1_offset);
4019                 btrfs_set_item_size(buf, item1, item2_size);
4020                 btrfs_set_item_size(buf, item2, item1_size);
4021
4022                 path->slots[0] = slot;
4023                 btrfs_set_item_key_unsafe(root, path, &k2);
4024                 path->slots[0] = slot + 1;
4025                 btrfs_set_item_key_unsafe(root, path, &k1);
4026         }
4027         return 0;
4028 }
4029
4030 static int fix_key_order(struct btrfs_trans_handle *trans,
4031                          struct btrfs_root *root,
4032                          struct btrfs_path *path)
4033 {
4034         struct extent_buffer *buf;
4035         struct btrfs_key k1, k2;
4036         int i;
4037         int level = path->lowest_level;
4038         int ret = -EIO;
4039
4040         buf = path->nodes[level];
4041         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4042                 if (level) {
4043                         btrfs_node_key_to_cpu(buf, &k1, i);
4044                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4045                 } else {
4046                         btrfs_item_key_to_cpu(buf, &k1, i);
4047                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4048                 }
4049                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4050                         continue;
4051                 ret = swap_values(root, path, buf, i);
4052                 if (ret)
4053                         break;
4054                 btrfs_mark_buffer_dirty(buf);
4055                 i = 0;
4056         }
4057         return ret;
4058 }
4059
4060 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4061                              struct btrfs_root *root,
4062                              struct btrfs_path *path,
4063                              struct extent_buffer *buf, int slot)
4064 {
4065         struct btrfs_key key;
4066         int nritems = btrfs_header_nritems(buf);
4067
4068         btrfs_item_key_to_cpu(buf, &key, slot);
4069
4070         /* These are all the keys we can deal with missing. */
4071         if (key.type != BTRFS_DIR_INDEX_KEY &&
4072             key.type != BTRFS_EXTENT_ITEM_KEY &&
4073             key.type != BTRFS_METADATA_ITEM_KEY &&
4074             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4075             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4076                 return -1;
4077
4078         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4079                (unsigned long long)key.objectid, key.type,
4080                (unsigned long long)key.offset, slot, buf->start);
4081         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4082                               btrfs_item_nr_offset(slot + 1),
4083                               sizeof(struct btrfs_item) *
4084                               (nritems - slot - 1));
4085         btrfs_set_header_nritems(buf, nritems - 1);
4086         if (slot == 0) {
4087                 struct btrfs_disk_key disk_key;
4088
4089                 btrfs_item_key(buf, &disk_key, 0);
4090                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4091         }
4092         btrfs_mark_buffer_dirty(buf);
4093         return 0;
4094 }
4095
4096 static int fix_item_offset(struct btrfs_trans_handle *trans,
4097                            struct btrfs_root *root,
4098                            struct btrfs_path *path)
4099 {
4100         struct extent_buffer *buf;
4101         int i;
4102         int ret = 0;
4103
4104         /* We should only get this for leaves */
4105         BUG_ON(path->lowest_level);
4106         buf = path->nodes[0];
4107 again:
4108         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4109                 unsigned int shift = 0, offset;
4110
4111                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4112                     BTRFS_LEAF_DATA_SIZE(root)) {
4113                         if (btrfs_item_end_nr(buf, i) >
4114                             BTRFS_LEAF_DATA_SIZE(root)) {
4115                                 ret = delete_bogus_item(trans, root, path,
4116                                                         buf, i);
4117                                 if (!ret)
4118                                         goto again;
4119                                 fprintf(stderr, "item is off the end of the "
4120                                         "leaf, can't fix\n");
4121                                 ret = -EIO;
4122                                 break;
4123                         }
4124                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4125                                 btrfs_item_end_nr(buf, i);
4126                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4127                            btrfs_item_offset_nr(buf, i - 1)) {
4128                         if (btrfs_item_end_nr(buf, i) >
4129                             btrfs_item_offset_nr(buf, i - 1)) {
4130                                 ret = delete_bogus_item(trans, root, path,
4131                                                         buf, i);
4132                                 if (!ret)
4133                                         goto again;
4134                                 fprintf(stderr, "items overlap, can't fix\n");
4135                                 ret = -EIO;
4136                                 break;
4137                         }
4138                         shift = btrfs_item_offset_nr(buf, i - 1) -
4139                                 btrfs_item_end_nr(buf, i);
4140                 }
4141                 if (!shift)
4142                         continue;
4143
4144                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4145                        i, shift, (unsigned long long)buf->start);
4146                 offset = btrfs_item_offset_nr(buf, i);
4147                 memmove_extent_buffer(buf,
4148                                       btrfs_leaf_data(buf) + offset + shift,
4149                                       btrfs_leaf_data(buf) + offset,
4150                                       btrfs_item_size_nr(buf, i));
4151                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4152                                       offset + shift);
4153                 btrfs_mark_buffer_dirty(buf);
4154         }
4155
4156         /*
4157          * We may have moved things, in which case we want to exit so we don't
4158          * write those changes out.  Once we have proper abort functionality in
4159          * progs this can be changed to something nicer.
4160          */
4161         BUG_ON(ret);
4162         return ret;
4163 }
4164
4165 /*
4166  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4167  * then just return -EIO.
4168  */
4169 static int try_to_fix_bad_block(struct btrfs_root *root,
4170                                 struct extent_buffer *buf,
4171                                 enum btrfs_tree_block_status status)
4172 {
4173         struct btrfs_trans_handle *trans;
4174         struct ulist *roots;
4175         struct ulist_node *node;
4176         struct btrfs_root *search_root;
4177         struct btrfs_path *path;
4178         struct ulist_iterator iter;
4179         struct btrfs_key root_key, key;
4180         int ret;
4181
4182         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4183             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4184                 return -EIO;
4185
4186         path = btrfs_alloc_path();
4187         if (!path)
4188                 return -EIO;
4189
4190         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4191                                    0, &roots);
4192         if (ret) {
4193                 btrfs_free_path(path);
4194                 return -EIO;
4195         }
4196
4197         ULIST_ITER_INIT(&iter);
4198         while ((node = ulist_next(roots, &iter))) {
4199                 root_key.objectid = node->val;
4200                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4201                 root_key.offset = (u64)-1;
4202
4203                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4204                 if (IS_ERR(root)) {
4205                         ret = -EIO;
4206                         break;
4207                 }
4208
4209
4210                 trans = btrfs_start_transaction(search_root, 0);
4211                 if (IS_ERR(trans)) {
4212                         ret = PTR_ERR(trans);
4213                         break;
4214                 }
4215
4216                 path->lowest_level = btrfs_header_level(buf);
4217                 path->skip_check_block = 1;
4218                 if (path->lowest_level)
4219                         btrfs_node_key_to_cpu(buf, &key, 0);
4220                 else
4221                         btrfs_item_key_to_cpu(buf, &key, 0);
4222                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4223                 if (ret) {
4224                         ret = -EIO;
4225                         btrfs_commit_transaction(trans, search_root);
4226                         break;
4227                 }
4228                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4229                         ret = fix_key_order(trans, search_root, path);
4230                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4231                         ret = fix_item_offset(trans, search_root, path);
4232                 if (ret) {
4233                         btrfs_commit_transaction(trans, search_root);
4234                         break;
4235                 }
4236                 btrfs_release_path(path);
4237                 btrfs_commit_transaction(trans, search_root);
4238         }
4239         ulist_free(roots);
4240         btrfs_free_path(path);
4241         return ret;
4242 }
4243
4244 static int check_block(struct btrfs_root *root,
4245                        struct cache_tree *extent_cache,
4246                        struct extent_buffer *buf, u64 flags)
4247 {
4248         struct extent_record *rec;
4249         struct cache_extent *cache;
4250         struct btrfs_key key;
4251         enum btrfs_tree_block_status status;
4252         int ret = 0;
4253         int level;
4254
4255         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4256         if (!cache)
4257                 return 1;
4258         rec = container_of(cache, struct extent_record, cache);
4259         rec->generation = btrfs_header_generation(buf);
4260
4261         level = btrfs_header_level(buf);
4262         if (btrfs_header_nritems(buf) > 0) {
4263
4264                 if (level == 0)
4265                         btrfs_item_key_to_cpu(buf, &key, 0);
4266                 else
4267                         btrfs_node_key_to_cpu(buf, &key, 0);
4268
4269                 rec->info_objectid = key.objectid;
4270         }
4271         rec->info_level = level;
4272
4273         if (btrfs_is_leaf(buf))
4274                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4275         else
4276                 status = btrfs_check_node(root, &rec->parent_key, buf);
4277
4278         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4279                 if (repair)
4280                         status = try_to_fix_bad_block(root, buf, status);
4281                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4282                         ret = -EIO;
4283                         fprintf(stderr, "bad block %llu\n",
4284                                 (unsigned long long)buf->start);
4285                 } else {
4286                         /*
4287                          * Signal to callers we need to start the scan over
4288                          * again since we'll have cow'ed blocks.
4289                          */
4290                         ret = -EAGAIN;
4291                 }
4292         } else {
4293                 rec->content_checked = 1;
4294                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4295                         rec->owner_ref_checked = 1;
4296                 else {
4297                         ret = check_owner_ref(root, rec, buf);
4298                         if (!ret)
4299                                 rec->owner_ref_checked = 1;
4300                 }
4301         }
4302         if (!ret)
4303                 maybe_free_extent_rec(extent_cache, rec);
4304         return ret;
4305 }
4306
4307 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4308                                                 u64 parent, u64 root)
4309 {
4310         struct list_head *cur = rec->backrefs.next;
4311         struct extent_backref *node;
4312         struct tree_backref *back;
4313
4314         while(cur != &rec->backrefs) {
4315                 node = list_entry(cur, struct extent_backref, list);
4316                 cur = cur->next;
4317                 if (node->is_data)
4318                         continue;
4319                 back = (struct tree_backref *)node;
4320                 if (parent > 0) {
4321                         if (!node->full_backref)
4322                                 continue;
4323                         if (parent == back->parent)
4324                                 return back;
4325                 } else {
4326                         if (node->full_backref)
4327                                 continue;
4328                         if (back->root == root)
4329                                 return back;
4330                 }
4331         }
4332         return NULL;
4333 }
4334
4335 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4336                                                 u64 parent, u64 root)
4337 {
4338         struct tree_backref *ref = malloc(sizeof(*ref));
4339         memset(&ref->node, 0, sizeof(ref->node));
4340         if (parent > 0) {
4341                 ref->parent = parent;
4342                 ref->node.full_backref = 1;
4343         } else {
4344                 ref->root = root;
4345                 ref->node.full_backref = 0;
4346         }
4347         list_add_tail(&ref->node.list, &rec->backrefs);
4348
4349         return ref;
4350 }
4351
4352 static struct data_backref *find_data_backref(struct extent_record *rec,
4353                                                 u64 parent, u64 root,
4354                                                 u64 owner, u64 offset,
4355                                                 int found_ref,
4356                                                 u64 disk_bytenr, u64 bytes)
4357 {
4358         struct list_head *cur = rec->backrefs.next;
4359         struct extent_backref *node;
4360         struct data_backref *back;
4361
4362         while(cur != &rec->backrefs) {
4363                 node = list_entry(cur, struct extent_backref, list);
4364                 cur = cur->next;
4365                 if (!node->is_data)
4366                         continue;
4367                 back = (struct data_backref *)node;
4368                 if (parent > 0) {
4369                         if (!node->full_backref)
4370                                 continue;
4371                         if (parent == back->parent)
4372                                 return back;
4373                 } else {
4374                         if (node->full_backref)
4375                                 continue;
4376                         if (back->root == root && back->owner == owner &&
4377                             back->offset == offset) {
4378                                 if (found_ref && node->found_ref &&
4379                                     (back->bytes != bytes ||
4380                                     back->disk_bytenr != disk_bytenr))
4381                                         continue;
4382                                 return back;
4383                         }
4384                 }
4385         }
4386         return NULL;
4387 }
4388
4389 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4390                                                 u64 parent, u64 root,
4391                                                 u64 owner, u64 offset,
4392                                                 u64 max_size)
4393 {
4394         struct data_backref *ref = malloc(sizeof(*ref));
4395         memset(&ref->node, 0, sizeof(ref->node));
4396         ref->node.is_data = 1;
4397
4398         if (parent > 0) {
4399                 ref->parent = parent;
4400                 ref->owner = 0;
4401                 ref->offset = 0;
4402                 ref->node.full_backref = 1;
4403         } else {
4404                 ref->root = root;
4405                 ref->owner = owner;
4406                 ref->offset = offset;
4407                 ref->node.full_backref = 0;
4408         }
4409         ref->bytes = max_size;
4410         ref->found_ref = 0;
4411         ref->num_refs = 0;
4412         list_add_tail(&ref->node.list, &rec->backrefs);
4413         if (max_size > rec->max_size)
4414                 rec->max_size = max_size;
4415         return ref;
4416 }
4417
4418 /* Check if the type of extent matches with its chunk */
4419 static void check_extent_type(struct extent_record *rec)
4420 {
4421         struct btrfs_block_group_cache *bg_cache;
4422
4423         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4424         if (!bg_cache)
4425                 return;
4426
4427         /* data extent, check chunk directly*/
4428         if (!rec->metadata) {
4429                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4430                         rec->wrong_chunk_type = 1;
4431                 return;
4432         }
4433
4434         /* metadata extent, check the obvious case first */
4435         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4436                                  BTRFS_BLOCK_GROUP_METADATA))) {
4437                 rec->wrong_chunk_type = 1;
4438                 return;
4439         }
4440
4441         /*
4442          * Check SYSTEM extent, as it's also marked as metadata, we can only
4443          * make sure it's a SYSTEM extent by its backref
4444          */
4445         if (!list_empty(&rec->backrefs)) {
4446                 struct extent_backref *node;
4447                 struct tree_backref *tback;
4448                 u64 bg_type;
4449
4450                 node = list_entry(rec->backrefs.next, struct extent_backref,
4451                                   list);
4452                 if (node->is_data) {
4453                         /* tree block shouldn't have data backref */
4454                         rec->wrong_chunk_type = 1;
4455                         return;
4456                 }
4457                 tback = container_of(node, struct tree_backref, node);
4458
4459                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4460                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4461                 else
4462                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4463                 if (!(bg_cache->flags & bg_type))
4464                         rec->wrong_chunk_type = 1;
4465         }
4466 }
4467
4468 static int add_extent_rec(struct cache_tree *extent_cache,
4469                           struct btrfs_key *parent_key, u64 parent_gen,
4470                           u64 start, u64 nr, u64 extent_item_refs,
4471                           int is_root, int inc_ref, int set_checked,
4472                           int metadata, int extent_rec, u64 max_size)
4473 {
4474         struct extent_record *rec;
4475         struct cache_extent *cache;
4476         int ret = 0;
4477         int dup = 0;
4478
4479         cache = lookup_cache_extent(extent_cache, start, nr);
4480         if (cache) {
4481                 rec = container_of(cache, struct extent_record, cache);
4482                 if (inc_ref)
4483                         rec->refs++;
4484                 if (rec->nr == 1)
4485                         rec->nr = max(nr, max_size);
4486
4487                 /*
4488                  * We need to make sure to reset nr to whatever the extent
4489                  * record says was the real size, this way we can compare it to
4490                  * the backrefs.
4491                  */
4492                 if (extent_rec) {
4493                         if (start != rec->start || rec->found_rec) {
4494                                 struct extent_record *tmp;
4495
4496                                 dup = 1;
4497                                 if (list_empty(&rec->list))
4498                                         list_add_tail(&rec->list,
4499                                                       &duplicate_extents);
4500
4501                                 /*
4502                                  * We have to do this song and dance in case we
4503                                  * find an extent record that falls inside of
4504                                  * our current extent record but does not have
4505                                  * the same objectid.
4506                                  */
4507                                 tmp = malloc(sizeof(*tmp));
4508                                 if (!tmp)
4509                                         return -ENOMEM;
4510                                 tmp->start = start;
4511                                 tmp->max_size = max_size;
4512                                 tmp->nr = nr;
4513                                 tmp->found_rec = 1;
4514                                 tmp->metadata = metadata;
4515                                 tmp->extent_item_refs = extent_item_refs;
4516                                 INIT_LIST_HEAD(&tmp->list);
4517                                 list_add_tail(&tmp->list, &rec->dups);
4518                                 rec->num_duplicates++;
4519                         } else {
4520                                 rec->nr = nr;
4521                                 rec->found_rec = 1;
4522                         }
4523                 }
4524
4525                 if (extent_item_refs && !dup) {
4526                         if (rec->extent_item_refs) {
4527                                 fprintf(stderr, "block %llu rec "
4528                                         "extent_item_refs %llu, passed %llu\n",
4529                                         (unsigned long long)start,
4530                                         (unsigned long long)
4531                                                         rec->extent_item_refs,
4532                                         (unsigned long long)extent_item_refs);
4533                         }
4534                         rec->extent_item_refs = extent_item_refs;
4535                 }
4536                 if (is_root)
4537                         rec->is_root = 1;
4538                 if (set_checked) {
4539                         rec->content_checked = 1;
4540                         rec->owner_ref_checked = 1;
4541                 }
4542
4543                 if (parent_key)
4544                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4545                 if (parent_gen)
4546                         rec->parent_generation = parent_gen;
4547
4548                 if (rec->max_size < max_size)
4549                         rec->max_size = max_size;
4550
4551                 /*
4552                  * A metadata extent can't cross stripe_len boundary, otherwise
4553                  * kernel scrub won't be able to handle it.
4554                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4555                  * it.
4556                  */
4557                 if (metadata && check_crossing_stripes(rec->start,
4558                                                        rec->max_size))
4559                                 rec->crossing_stripes = 1;
4560                 check_extent_type(rec);
4561                 maybe_free_extent_rec(extent_cache, rec);
4562                 return ret;
4563         }
4564         rec = malloc(sizeof(*rec));
4565         rec->start = start;
4566         rec->max_size = max_size;
4567         rec->nr = max(nr, max_size);
4568         rec->found_rec = !!extent_rec;
4569         rec->content_checked = 0;
4570         rec->owner_ref_checked = 0;
4571         rec->num_duplicates = 0;
4572         rec->metadata = metadata;
4573         rec->flag_block_full_backref = -1;
4574         rec->bad_full_backref = 0;
4575         rec->crossing_stripes = 0;
4576         rec->wrong_chunk_type = 0;
4577         INIT_LIST_HEAD(&rec->backrefs);
4578         INIT_LIST_HEAD(&rec->dups);
4579         INIT_LIST_HEAD(&rec->list);
4580
4581         if (is_root)
4582                 rec->is_root = 1;
4583         else
4584                 rec->is_root = 0;
4585
4586         if (inc_ref)
4587                 rec->refs = 1;
4588         else
4589                 rec->refs = 0;
4590
4591         if (extent_item_refs)
4592                 rec->extent_item_refs = extent_item_refs;
4593         else
4594                 rec->extent_item_refs = 0;
4595
4596         if (parent_key)
4597                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4598         else
4599                 memset(&rec->parent_key, 0, sizeof(*parent_key));
4600
4601         if (parent_gen)
4602                 rec->parent_generation = parent_gen;
4603         else
4604                 rec->parent_generation = 0;
4605
4606         rec->cache.start = start;
4607         rec->cache.size = nr;
4608         ret = insert_cache_extent(extent_cache, &rec->cache);
4609         BUG_ON(ret);
4610         bytes_used += nr;
4611         if (set_checked) {
4612                 rec->content_checked = 1;
4613                 rec->owner_ref_checked = 1;
4614         }
4615
4616         if (metadata)
4617                 if (check_crossing_stripes(rec->start, rec->max_size))
4618                         rec->crossing_stripes = 1;
4619         check_extent_type(rec);
4620         return ret;
4621 }
4622
4623 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4624                             u64 parent, u64 root, int found_ref)
4625 {
4626         struct extent_record *rec;
4627         struct tree_backref *back;
4628         struct cache_extent *cache;
4629
4630         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4631         if (!cache) {
4632                 add_extent_rec(extent_cache, NULL, 0, bytenr,
4633                                1, 0, 0, 0, 0, 1, 0, 0);
4634                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4635                 if (!cache)
4636                         abort();
4637         }
4638
4639         rec = container_of(cache, struct extent_record, cache);
4640         if (rec->start != bytenr) {
4641                 abort();
4642         }
4643
4644         back = find_tree_backref(rec, parent, root);
4645         if (!back)
4646                 back = alloc_tree_backref(rec, parent, root);
4647
4648         if (found_ref) {
4649                 if (back->node.found_ref) {
4650                         fprintf(stderr, "Extent back ref already exists "
4651                                 "for %llu parent %llu root %llu \n",
4652                                 (unsigned long long)bytenr,
4653                                 (unsigned long long)parent,
4654                                 (unsigned long long)root);
4655                 }
4656                 back->node.found_ref = 1;
4657         } else {
4658                 if (back->node.found_extent_tree) {
4659                         fprintf(stderr, "Extent back ref already exists "
4660                                 "for %llu parent %llu root %llu \n",
4661                                 (unsigned long long)bytenr,
4662                                 (unsigned long long)parent,
4663                                 (unsigned long long)root);
4664                 }
4665                 back->node.found_extent_tree = 1;
4666         }
4667         check_extent_type(rec);
4668         maybe_free_extent_rec(extent_cache, rec);
4669         return 0;
4670 }
4671
4672 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4673                             u64 parent, u64 root, u64 owner, u64 offset,
4674                             u32 num_refs, int found_ref, u64 max_size)
4675 {
4676         struct extent_record *rec;
4677         struct data_backref *back;
4678         struct cache_extent *cache;
4679
4680         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4681         if (!cache) {
4682                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4683                                0, 0, max_size);
4684                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4685                 if (!cache)
4686                         abort();
4687         }
4688
4689         rec = container_of(cache, struct extent_record, cache);
4690         if (rec->max_size < max_size)
4691                 rec->max_size = max_size;
4692
4693         /*
4694          * If found_ref is set then max_size is the real size and must match the
4695          * existing refs.  So if we have already found a ref then we need to
4696          * make sure that this ref matches the existing one, otherwise we need
4697          * to add a new backref so we can notice that the backrefs don't match
4698          * and we need to figure out who is telling the truth.  This is to
4699          * account for that awful fsync bug I introduced where we'd end up with
4700          * a btrfs_file_extent_item that would have its length include multiple
4701          * prealloc extents or point inside of a prealloc extent.
4702          */
4703         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4704                                  bytenr, max_size);
4705         if (!back)
4706                 back = alloc_data_backref(rec, parent, root, owner, offset,
4707                                           max_size);
4708
4709         if (found_ref) {
4710                 BUG_ON(num_refs != 1);
4711                 if (back->node.found_ref)
4712                         BUG_ON(back->bytes != max_size);
4713                 back->node.found_ref = 1;
4714                 back->found_ref += 1;
4715                 back->bytes = max_size;
4716                 back->disk_bytenr = bytenr;
4717                 rec->refs += 1;
4718                 rec->content_checked = 1;
4719                 rec->owner_ref_checked = 1;
4720         } else {
4721                 if (back->node.found_extent_tree) {
4722                         fprintf(stderr, "Extent back ref already exists "
4723                                 "for %llu parent %llu root %llu "
4724                                 "owner %llu offset %llu num_refs %lu\n",
4725                                 (unsigned long long)bytenr,
4726                                 (unsigned long long)parent,
4727                                 (unsigned long long)root,
4728                                 (unsigned long long)owner,
4729                                 (unsigned long long)offset,
4730                                 (unsigned long)num_refs);
4731                 }
4732                 back->num_refs = num_refs;
4733                 back->node.found_extent_tree = 1;
4734         }
4735         maybe_free_extent_rec(extent_cache, rec);
4736         return 0;
4737 }
4738
4739 static int add_pending(struct cache_tree *pending,
4740                        struct cache_tree *seen, u64 bytenr, u32 size)
4741 {
4742         int ret;
4743         ret = add_cache_extent(seen, bytenr, size);
4744         if (ret)
4745                 return ret;
4746         add_cache_extent(pending, bytenr, size);
4747         return 0;
4748 }
4749
4750 static int pick_next_pending(struct cache_tree *pending,
4751                         struct cache_tree *reada,
4752                         struct cache_tree *nodes,
4753                         u64 last, struct block_info *bits, int bits_nr,
4754                         int *reada_bits)
4755 {
4756         unsigned long node_start = last;
4757         struct cache_extent *cache;
4758         int ret;
4759
4760         cache = search_cache_extent(reada, 0);
4761         if (cache) {
4762                 bits[0].start = cache->start;
4763                 bits[0].size = cache->size;
4764                 *reada_bits = 1;
4765                 return 1;
4766         }
4767         *reada_bits = 0;
4768         if (node_start > 32768)
4769                 node_start -= 32768;
4770
4771         cache = search_cache_extent(nodes, node_start);
4772         if (!cache)
4773                 cache = search_cache_extent(nodes, 0);
4774
4775         if (!cache) {
4776                  cache = search_cache_extent(pending, 0);
4777                  if (!cache)
4778                          return 0;
4779                  ret = 0;
4780                  do {
4781                          bits[ret].start = cache->start;
4782                          bits[ret].size = cache->size;
4783                          cache = next_cache_extent(cache);
4784                          ret++;
4785                  } while (cache && ret < bits_nr);
4786                  return ret;
4787         }
4788
4789         ret = 0;
4790         do {
4791                 bits[ret].start = cache->start;
4792                 bits[ret].size = cache->size;
4793                 cache = next_cache_extent(cache);
4794                 ret++;
4795         } while (cache && ret < bits_nr);
4796
4797         if (bits_nr - ret > 8) {
4798                 u64 lookup = bits[0].start + bits[0].size;
4799                 struct cache_extent *next;
4800                 next = search_cache_extent(pending, lookup);
4801                 while(next) {
4802                         if (next->start - lookup > 32768)
4803                                 break;
4804                         bits[ret].start = next->start;
4805                         bits[ret].size = next->size;
4806                         lookup = next->start + next->size;
4807                         ret++;
4808                         if (ret == bits_nr)
4809                                 break;
4810                         next = next_cache_extent(next);
4811                         if (!next)
4812                                 break;
4813                 }
4814         }
4815         return ret;
4816 }
4817
4818 static void free_chunk_record(struct cache_extent *cache)
4819 {
4820         struct chunk_record *rec;
4821
4822         rec = container_of(cache, struct chunk_record, cache);
4823         list_del_init(&rec->list);
4824         list_del_init(&rec->dextents);
4825         free(rec);
4826 }
4827
4828 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4829 {
4830         cache_tree_free_extents(chunk_cache, free_chunk_record);
4831 }
4832
4833 static void free_device_record(struct rb_node *node)
4834 {
4835         struct device_record *rec;
4836
4837         rec = container_of(node, struct device_record, node);
4838         free(rec);
4839 }
4840
4841 FREE_RB_BASED_TREE(device_cache, free_device_record);
4842
4843 int insert_block_group_record(struct block_group_tree *tree,
4844                               struct block_group_record *bg_rec)
4845 {
4846         int ret;
4847
4848         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4849         if (ret)
4850                 return ret;
4851
4852         list_add_tail(&bg_rec->list, &tree->block_groups);
4853         return 0;
4854 }
4855
4856 static void free_block_group_record(struct cache_extent *cache)
4857 {
4858         struct block_group_record *rec;
4859
4860         rec = container_of(cache, struct block_group_record, cache);
4861         list_del_init(&rec->list);
4862         free(rec);
4863 }
4864
4865 void free_block_group_tree(struct block_group_tree *tree)
4866 {
4867         cache_tree_free_extents(&tree->tree, free_block_group_record);
4868 }
4869
4870 int insert_device_extent_record(struct device_extent_tree *tree,
4871                                 struct device_extent_record *de_rec)
4872 {
4873         int ret;
4874
4875         /*
4876          * Device extent is a bit different from the other extents, because
4877          * the extents which belong to the different devices may have the
4878          * same start and size, so we need use the special extent cache
4879          * search/insert functions.
4880          */
4881         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4882         if (ret)
4883                 return ret;
4884
4885         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4886         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4887         return 0;
4888 }
4889
4890 static void free_device_extent_record(struct cache_extent *cache)
4891 {
4892         struct device_extent_record *rec;
4893
4894         rec = container_of(cache, struct device_extent_record, cache);
4895         if (!list_empty(&rec->chunk_list))
4896                 list_del_init(&rec->chunk_list);
4897         if (!list_empty(&rec->device_list))
4898                 list_del_init(&rec->device_list);
4899         free(rec);
4900 }
4901
4902 void free_device_extent_tree(struct device_extent_tree *tree)
4903 {
4904         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4905 }
4906
4907 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4908 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4909                                  struct extent_buffer *leaf, int slot)
4910 {
4911         struct btrfs_extent_ref_v0 *ref0;
4912         struct btrfs_key key;
4913
4914         btrfs_item_key_to_cpu(leaf, &key, slot);
4915         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4916         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4917                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4918         } else {
4919                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4920                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4921         }
4922         return 0;
4923 }
4924 #endif
4925
4926 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4927                                             struct btrfs_key *key,
4928                                             int slot)
4929 {
4930         struct btrfs_chunk *ptr;
4931         struct chunk_record *rec;
4932         int num_stripes, i;
4933
4934         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4935         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4936
4937         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4938         if (!rec) {
4939                 fprintf(stderr, "memory allocation failed\n");
4940                 exit(-1);
4941         }
4942
4943         INIT_LIST_HEAD(&rec->list);
4944         INIT_LIST_HEAD(&rec->dextents);
4945         rec->bg_rec = NULL;
4946
4947         rec->cache.start = key->offset;
4948         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4949
4950         rec->generation = btrfs_header_generation(leaf);
4951
4952         rec->objectid = key->objectid;
4953         rec->type = key->type;
4954         rec->offset = key->offset;
4955
4956         rec->length = rec->cache.size;
4957         rec->owner = btrfs_chunk_owner(leaf, ptr);
4958         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4959         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4960         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4961         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4962         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4963         rec->num_stripes = num_stripes;
4964         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4965
4966         for (i = 0; i < rec->num_stripes; ++i) {
4967                 rec->stripes[i].devid =
4968                         btrfs_stripe_devid_nr(leaf, ptr, i);
4969                 rec->stripes[i].offset =
4970                         btrfs_stripe_offset_nr(leaf, ptr, i);
4971                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4972                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4973                                 BTRFS_UUID_SIZE);
4974         }
4975
4976         return rec;
4977 }
4978
4979 static int process_chunk_item(struct cache_tree *chunk_cache,
4980                               struct btrfs_key *key, struct extent_buffer *eb,
4981                               int slot)
4982 {
4983         struct chunk_record *rec;
4984         int ret = 0;
4985
4986         rec = btrfs_new_chunk_record(eb, key, slot);
4987         ret = insert_cache_extent(chunk_cache, &rec->cache);
4988         if (ret) {
4989                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4990                         rec->offset, rec->length);
4991                 free(rec);
4992         }
4993
4994         return ret;
4995 }
4996
4997 static int process_device_item(struct rb_root *dev_cache,
4998                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4999 {
5000         struct btrfs_dev_item *ptr;
5001         struct device_record *rec;
5002         int ret = 0;
5003
5004         ptr = btrfs_item_ptr(eb,
5005                 slot, struct btrfs_dev_item);
5006
5007         rec = malloc(sizeof(*rec));
5008         if (!rec) {
5009                 fprintf(stderr, "memory allocation failed\n");
5010                 return -ENOMEM;
5011         }
5012
5013         rec->devid = key->offset;
5014         rec->generation = btrfs_header_generation(eb);
5015
5016         rec->objectid = key->objectid;
5017         rec->type = key->type;
5018         rec->offset = key->offset;
5019
5020         rec->devid = btrfs_device_id(eb, ptr);
5021         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5022         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5023
5024         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5025         if (ret) {
5026                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5027                 free(rec);
5028         }
5029
5030         return ret;
5031 }
5032
5033 struct block_group_record *
5034 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5035                              int slot)
5036 {
5037         struct btrfs_block_group_item *ptr;
5038         struct block_group_record *rec;
5039
5040         rec = calloc(1, sizeof(*rec));
5041         if (!rec) {
5042                 fprintf(stderr, "memory allocation failed\n");
5043                 exit(-1);
5044         }
5045
5046         rec->cache.start = key->objectid;
5047         rec->cache.size = key->offset;
5048
5049         rec->generation = btrfs_header_generation(leaf);
5050
5051         rec->objectid = key->objectid;
5052         rec->type = key->type;
5053         rec->offset = key->offset;
5054
5055         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5056         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5057
5058         INIT_LIST_HEAD(&rec->list);
5059
5060         return rec;
5061 }
5062
5063 static int process_block_group_item(struct block_group_tree *block_group_cache,
5064                                     struct btrfs_key *key,
5065                                     struct extent_buffer *eb, int slot)
5066 {
5067         struct block_group_record *rec;
5068         int ret = 0;
5069
5070         rec = btrfs_new_block_group_record(eb, key, slot);
5071         ret = insert_block_group_record(block_group_cache, rec);
5072         if (ret) {
5073                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5074                         rec->objectid, rec->offset);
5075                 free(rec);
5076         }
5077
5078         return ret;
5079 }
5080
5081 struct device_extent_record *
5082 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5083                                struct btrfs_key *key, int slot)
5084 {
5085         struct device_extent_record *rec;
5086         struct btrfs_dev_extent *ptr;
5087
5088         rec = calloc(1, sizeof(*rec));
5089         if (!rec) {
5090                 fprintf(stderr, "memory allocation failed\n");
5091                 exit(-1);
5092         }
5093
5094         rec->cache.objectid = key->objectid;
5095         rec->cache.start = key->offset;
5096
5097         rec->generation = btrfs_header_generation(leaf);
5098
5099         rec->objectid = key->objectid;
5100         rec->type = key->type;
5101         rec->offset = key->offset;
5102
5103         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5104         rec->chunk_objecteid =
5105                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5106         rec->chunk_offset =
5107                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5108         rec->length = btrfs_dev_extent_length(leaf, ptr);
5109         rec->cache.size = rec->length;
5110
5111         INIT_LIST_HEAD(&rec->chunk_list);
5112         INIT_LIST_HEAD(&rec->device_list);
5113
5114         return rec;
5115 }
5116
5117 static int
5118 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5119                            struct btrfs_key *key, struct extent_buffer *eb,
5120                            int slot)
5121 {
5122         struct device_extent_record *rec;
5123         int ret;
5124
5125         rec = btrfs_new_device_extent_record(eb, key, slot);
5126         ret = insert_device_extent_record(dev_extent_cache, rec);
5127         if (ret) {
5128                 fprintf(stderr,
5129                         "Device extent[%llu, %llu, %llu] existed.\n",
5130                         rec->objectid, rec->offset, rec->length);
5131                 free(rec);
5132         }
5133
5134         return ret;
5135 }
5136
5137 static int process_extent_item(struct btrfs_root *root,
5138                                struct cache_tree *extent_cache,
5139                                struct extent_buffer *eb, int slot)
5140 {
5141         struct btrfs_extent_item *ei;
5142         struct btrfs_extent_inline_ref *iref;
5143         struct btrfs_extent_data_ref *dref;
5144         struct btrfs_shared_data_ref *sref;
5145         struct btrfs_key key;
5146         unsigned long end;
5147         unsigned long ptr;
5148         int type;
5149         u32 item_size = btrfs_item_size_nr(eb, slot);
5150         u64 refs = 0;
5151         u64 offset;
5152         u64 num_bytes;
5153         int metadata = 0;
5154
5155         btrfs_item_key_to_cpu(eb, &key, slot);
5156
5157         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5158                 metadata = 1;
5159                 num_bytes = root->leafsize;
5160         } else {
5161                 num_bytes = key.offset;
5162         }
5163
5164         if (item_size < sizeof(*ei)) {
5165 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5166                 struct btrfs_extent_item_v0 *ei0;
5167                 BUG_ON(item_size != sizeof(*ei0));
5168                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5169                 refs = btrfs_extent_refs_v0(eb, ei0);
5170 #else
5171                 BUG();
5172 #endif
5173                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5174                                       num_bytes, refs, 0, 0, 0, metadata, 1,
5175                                       num_bytes);
5176         }
5177
5178         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5179         refs = btrfs_extent_refs(eb, ei);
5180         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5181                 metadata = 1;
5182         else
5183                 metadata = 0;
5184
5185         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5186                        refs, 0, 0, 0, metadata, 1, num_bytes);
5187
5188         ptr = (unsigned long)(ei + 1);
5189         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5190             key.type == BTRFS_EXTENT_ITEM_KEY)
5191                 ptr += sizeof(struct btrfs_tree_block_info);
5192
5193         end = (unsigned long)ei + item_size;
5194         while (ptr < end) {
5195                 iref = (struct btrfs_extent_inline_ref *)ptr;
5196                 type = btrfs_extent_inline_ref_type(eb, iref);
5197                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5198                 switch (type) {
5199                 case BTRFS_TREE_BLOCK_REF_KEY:
5200                         add_tree_backref(extent_cache, key.objectid,
5201                                          0, offset, 0);
5202                         break;
5203                 case BTRFS_SHARED_BLOCK_REF_KEY:
5204                         add_tree_backref(extent_cache, key.objectid,
5205                                          offset, 0, 0);
5206                         break;
5207                 case BTRFS_EXTENT_DATA_REF_KEY:
5208                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5209                         add_data_backref(extent_cache, key.objectid, 0,
5210                                         btrfs_extent_data_ref_root(eb, dref),
5211                                         btrfs_extent_data_ref_objectid(eb,
5212                                                                        dref),
5213                                         btrfs_extent_data_ref_offset(eb, dref),
5214                                         btrfs_extent_data_ref_count(eb, dref),
5215                                         0, num_bytes);
5216                         break;
5217                 case BTRFS_SHARED_DATA_REF_KEY:
5218                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5219                         add_data_backref(extent_cache, key.objectid, offset,
5220                                         0, 0, 0,
5221                                         btrfs_shared_data_ref_count(eb, sref),
5222                                         0, num_bytes);
5223                         break;
5224                 default:
5225                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5226                                 key.objectid, key.type, num_bytes);
5227                         goto out;
5228                 }
5229                 ptr += btrfs_extent_inline_ref_size(type);
5230         }
5231         WARN_ON(ptr > end);
5232 out:
5233         return 0;
5234 }
5235
5236 static int check_cache_range(struct btrfs_root *root,
5237                              struct btrfs_block_group_cache *cache,
5238                              u64 offset, u64 bytes)
5239 {
5240         struct btrfs_free_space *entry;
5241         u64 *logical;
5242         u64 bytenr;
5243         int stripe_len;
5244         int i, nr, ret;
5245
5246         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5247                 bytenr = btrfs_sb_offset(i);
5248                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5249                                        cache->key.objectid, bytenr, 0,
5250                                        &logical, &nr, &stripe_len);
5251                 if (ret)
5252                         return ret;
5253
5254                 while (nr--) {
5255                         if (logical[nr] + stripe_len <= offset)
5256                                 continue;
5257                         if (offset + bytes <= logical[nr])
5258                                 continue;
5259                         if (logical[nr] == offset) {
5260                                 if (stripe_len >= bytes) {
5261                                         kfree(logical);
5262                                         return 0;
5263                                 }
5264                                 bytes -= stripe_len;
5265                                 offset += stripe_len;
5266                         } else if (logical[nr] < offset) {
5267                                 if (logical[nr] + stripe_len >=
5268                                     offset + bytes) {
5269                                         kfree(logical);
5270                                         return 0;
5271                                 }
5272                                 bytes = (offset + bytes) -
5273                                         (logical[nr] + stripe_len);
5274                                 offset = logical[nr] + stripe_len;
5275                         } else {
5276                                 /*
5277                                  * Could be tricky, the super may land in the
5278                                  * middle of the area we're checking.  First
5279                                  * check the easiest case, it's at the end.
5280                                  */
5281                                 if (logical[nr] + stripe_len >=
5282                                     bytes + offset) {
5283                                         bytes = logical[nr] - offset;
5284                                         continue;
5285                                 }
5286
5287                                 /* Check the left side */
5288                                 ret = check_cache_range(root, cache,
5289                                                         offset,
5290                                                         logical[nr] - offset);
5291                                 if (ret) {
5292                                         kfree(logical);
5293                                         return ret;
5294                                 }
5295
5296                                 /* Now we continue with the right side */
5297                                 bytes = (offset + bytes) -
5298                                         (logical[nr] + stripe_len);
5299                                 offset = logical[nr] + stripe_len;
5300                         }
5301                 }
5302
5303                 kfree(logical);
5304         }
5305
5306         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5307         if (!entry) {
5308                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5309                         offset, offset+bytes);
5310                 return -EINVAL;
5311         }
5312
5313         if (entry->offset != offset) {
5314                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5315                         entry->offset);
5316                 return -EINVAL;
5317         }
5318
5319         if (entry->bytes != bytes) {
5320                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5321                         bytes, entry->bytes, offset);
5322                 return -EINVAL;
5323         }
5324
5325         unlink_free_space(cache->free_space_ctl, entry);
5326         free(entry);
5327         return 0;
5328 }
5329
5330 static int verify_space_cache(struct btrfs_root *root,
5331                               struct btrfs_block_group_cache *cache)
5332 {
5333         struct btrfs_path *path;
5334         struct extent_buffer *leaf;
5335         struct btrfs_key key;
5336         u64 last;
5337         int ret = 0;
5338
5339         path = btrfs_alloc_path();
5340         if (!path)
5341                 return -ENOMEM;
5342
5343         root = root->fs_info->extent_root;
5344
5345         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5346
5347         key.objectid = last;
5348         key.offset = 0;
5349         key.type = BTRFS_EXTENT_ITEM_KEY;
5350
5351         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5352         if (ret < 0)
5353                 goto out;
5354         ret = 0;
5355         while (1) {
5356                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5357                         ret = btrfs_next_leaf(root, path);
5358                         if (ret < 0)
5359                                 goto out;
5360                         if (ret > 0) {
5361                                 ret = 0;
5362                                 break;
5363                         }
5364                 }
5365                 leaf = path->nodes[0];
5366                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5367                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5368                         break;
5369                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5370                     key.type != BTRFS_METADATA_ITEM_KEY) {
5371                         path->slots[0]++;
5372                         continue;
5373                 }
5374
5375                 if (last == key.objectid) {
5376                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5377                                 last = key.objectid + key.offset;
5378                         else
5379                                 last = key.objectid + root->leafsize;
5380                         path->slots[0]++;
5381                         continue;
5382                 }
5383
5384                 ret = check_cache_range(root, cache, last,
5385                                         key.objectid - last);
5386                 if (ret)
5387                         break;
5388                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5389                         last = key.objectid + key.offset;
5390                 else
5391                         last = key.objectid + root->leafsize;
5392                 path->slots[0]++;
5393         }
5394
5395         if (last < cache->key.objectid + cache->key.offset)
5396                 ret = check_cache_range(root, cache, last,
5397                                         cache->key.objectid +
5398                                         cache->key.offset - last);
5399
5400 out:
5401         btrfs_free_path(path);
5402
5403         if (!ret &&
5404             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5405                 fprintf(stderr, "There are still entries left in the space "
5406                         "cache\n");
5407                 ret = -EINVAL;
5408         }
5409
5410         return ret;
5411 }
5412
5413 static int check_space_cache(struct btrfs_root *root)
5414 {
5415         struct btrfs_block_group_cache *cache;
5416         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5417         int ret;
5418         int error = 0;
5419
5420         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5421             btrfs_super_generation(root->fs_info->super_copy) !=
5422             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5423                 printf("cache and super generation don't match, space cache "
5424                        "will be invalidated\n");
5425                 return 0;
5426         }
5427
5428         if (ctx.progress_enabled) {
5429                 ctx.tp = TASK_FREE_SPACE;
5430                 task_start(ctx.info);
5431         }
5432
5433         while (1) {
5434                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5435                 if (!cache)
5436                         break;
5437
5438                 start = cache->key.objectid + cache->key.offset;
5439                 if (!cache->free_space_ctl) {
5440                         if (btrfs_init_free_space_ctl(cache,
5441                                                       root->sectorsize)) {
5442                                 ret = -ENOMEM;
5443                                 break;
5444                         }
5445                 } else {
5446                         btrfs_remove_free_space_cache(cache);
5447                 }
5448
5449                 ret = load_free_space_cache(root->fs_info, cache);
5450                 if (!ret)
5451                         continue;
5452
5453                 ret = verify_space_cache(root, cache);
5454                 if (ret) {
5455                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
5456                                 cache->key.objectid);
5457                         error++;
5458                 }
5459         }
5460
5461         task_stop(ctx.info);
5462
5463         return error ? -EINVAL : 0;
5464 }
5465
5466 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5467                         u64 num_bytes, unsigned long leaf_offset,
5468                         struct extent_buffer *eb) {
5469
5470         u64 offset = 0;
5471         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5472         char *data;
5473         unsigned long csum_offset;
5474         u32 csum;
5475         u32 csum_expected;
5476         u64 read_len;
5477         u64 data_checked = 0;
5478         u64 tmp;
5479         int ret = 0;
5480         int mirror;
5481         int num_copies;
5482
5483         if (num_bytes % root->sectorsize)
5484                 return -EINVAL;
5485
5486         data = malloc(num_bytes);
5487         if (!data)
5488                 return -ENOMEM;
5489
5490         while (offset < num_bytes) {
5491                 mirror = 0;
5492 again:
5493                 read_len = num_bytes - offset;
5494                 /* read as much space once a time */
5495                 ret = read_extent_data(root, data + offset,
5496                                 bytenr + offset, &read_len, mirror);
5497                 if (ret)
5498                         goto out;
5499                 data_checked = 0;
5500                 /* verify every 4k data's checksum */
5501                 while (data_checked < read_len) {
5502                         csum = ~(u32)0;
5503                         tmp = offset + data_checked;
5504
5505                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5506                                                csum, root->sectorsize);
5507                         btrfs_csum_final(csum, (char *)&csum);
5508
5509                         csum_offset = leaf_offset +
5510                                  tmp / root->sectorsize * csum_size;
5511                         read_extent_buffer(eb, (char *)&csum_expected,
5512                                            csum_offset, csum_size);
5513                         /* try another mirror */
5514                         if (csum != csum_expected) {
5515                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5516                                                 mirror, bytenr + tmp,
5517                                                 csum, csum_expected);
5518                                 num_copies = btrfs_num_copies(
5519                                                 &root->fs_info->mapping_tree,
5520                                                 bytenr, num_bytes);
5521                                 if (mirror < num_copies - 1) {
5522                                         mirror += 1;
5523                                         goto again;
5524                                 }
5525                         }
5526                         data_checked += root->sectorsize;
5527                 }
5528                 offset += read_len;
5529         }
5530 out:
5531         free(data);
5532         return ret;
5533 }
5534
5535 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5536                                u64 num_bytes)
5537 {
5538         struct btrfs_path *path;
5539         struct extent_buffer *leaf;
5540         struct btrfs_key key;
5541         int ret;
5542
5543         path = btrfs_alloc_path();
5544         if (!path) {
5545                 fprintf(stderr, "Error allocing path\n");
5546                 return -ENOMEM;
5547         }
5548
5549         key.objectid = bytenr;
5550         key.type = BTRFS_EXTENT_ITEM_KEY;
5551         key.offset = (u64)-1;
5552
5553 again:
5554         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5555                                 0, 0);
5556         if (ret < 0) {
5557                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5558                 btrfs_free_path(path);
5559                 return ret;
5560         } else if (ret) {
5561                 if (path->slots[0] > 0) {
5562                         path->slots[0]--;
5563                 } else {
5564                         ret = btrfs_prev_leaf(root, path);
5565                         if (ret < 0) {
5566                                 goto out;
5567                         } else if (ret > 0) {
5568                                 ret = 0;
5569                                 goto out;
5570                         }
5571                 }
5572         }
5573
5574         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5575
5576         /*
5577          * Block group items come before extent items if they have the same
5578          * bytenr, so walk back one more just in case.  Dear future traveler,
5579          * first congrats on mastering time travel.  Now if it's not too much
5580          * trouble could you go back to 2006 and tell Chris to make the
5581          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5582          * EXTENT_ITEM_KEY please?
5583          */
5584         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5585                 if (path->slots[0] > 0) {
5586                         path->slots[0]--;
5587                 } else {
5588                         ret = btrfs_prev_leaf(root, path);
5589                         if (ret < 0) {
5590                                 goto out;
5591                         } else if (ret > 0) {
5592                                 ret = 0;
5593                                 goto out;
5594                         }
5595                 }
5596                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5597         }
5598
5599         while (num_bytes) {
5600                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5601                         ret = btrfs_next_leaf(root, path);
5602                         if (ret < 0) {
5603                                 fprintf(stderr, "Error going to next leaf "
5604                                         "%d\n", ret);
5605                                 btrfs_free_path(path);
5606                                 return ret;
5607                         } else if (ret) {
5608                                 break;
5609                         }
5610                 }
5611                 leaf = path->nodes[0];
5612                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5613                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5614                         path->slots[0]++;
5615                         continue;
5616                 }
5617                 if (key.objectid + key.offset < bytenr) {
5618                         path->slots[0]++;
5619                         continue;
5620                 }
5621                 if (key.objectid > bytenr + num_bytes)
5622                         break;
5623
5624                 if (key.objectid == bytenr) {
5625                         if (key.offset >= num_bytes) {
5626                                 num_bytes = 0;
5627                                 break;
5628                         }
5629                         num_bytes -= key.offset;
5630                         bytenr += key.offset;
5631                 } else if (key.objectid < bytenr) {
5632                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5633                                 num_bytes = 0;
5634                                 break;
5635                         }
5636                         num_bytes = (bytenr + num_bytes) -
5637                                 (key.objectid + key.offset);
5638                         bytenr = key.objectid + key.offset;
5639                 } else {
5640                         if (key.objectid + key.offset < bytenr + num_bytes) {
5641                                 u64 new_start = key.objectid + key.offset;
5642                                 u64 new_bytes = bytenr + num_bytes - new_start;
5643
5644                                 /*
5645                                  * Weird case, the extent is in the middle of
5646                                  * our range, we'll have to search one side
5647                                  * and then the other.  Not sure if this happens
5648                                  * in real life, but no harm in coding it up
5649                                  * anyway just in case.
5650                                  */
5651                                 btrfs_release_path(path);
5652                                 ret = check_extent_exists(root, new_start,
5653                                                           new_bytes);
5654                                 if (ret) {
5655                                         fprintf(stderr, "Right section didn't "
5656                                                 "have a record\n");
5657                                         break;
5658                                 }
5659                                 num_bytes = key.objectid - bytenr;
5660                                 goto again;
5661                         }
5662                         num_bytes = key.objectid - bytenr;
5663                 }
5664                 path->slots[0]++;
5665         }
5666         ret = 0;
5667
5668 out:
5669         if (num_bytes && !ret) {
5670                 fprintf(stderr, "There are no extents for csum range "
5671                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5672                 ret = 1;
5673         }
5674
5675         btrfs_free_path(path);
5676         return ret;
5677 }
5678
5679 static int check_csums(struct btrfs_root *root)
5680 {
5681         struct btrfs_path *path;
5682         struct extent_buffer *leaf;
5683         struct btrfs_key key;
5684         u64 offset = 0, num_bytes = 0;
5685         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5686         int errors = 0;
5687         int ret;
5688         u64 data_len;
5689         unsigned long leaf_offset;
5690
5691         root = root->fs_info->csum_root;
5692         if (!extent_buffer_uptodate(root->node)) {
5693                 fprintf(stderr, "No valid csum tree found\n");
5694                 return -ENOENT;
5695         }
5696
5697         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5698         key.type = BTRFS_EXTENT_CSUM_KEY;
5699         key.offset = 0;
5700
5701         path = btrfs_alloc_path();
5702         if (!path)
5703                 return -ENOMEM;
5704
5705         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5706         if (ret < 0) {
5707                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5708                 btrfs_free_path(path);
5709                 return ret;
5710         }
5711
5712         if (ret > 0 && path->slots[0])
5713                 path->slots[0]--;
5714         ret = 0;
5715
5716         while (1) {
5717                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5718                         ret = btrfs_next_leaf(root, path);
5719                         if (ret < 0) {
5720                                 fprintf(stderr, "Error going to next leaf "
5721                                         "%d\n", ret);
5722                                 break;
5723                         }
5724                         if (ret)
5725                                 break;
5726                 }
5727                 leaf = path->nodes[0];
5728
5729                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5730                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5731                         path->slots[0]++;
5732                         continue;
5733                 }
5734
5735                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5736                               csum_size) * root->sectorsize;
5737                 if (!check_data_csum)
5738                         goto skip_csum_check;
5739                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5740                 ret = check_extent_csums(root, key.offset, data_len,
5741                                          leaf_offset, leaf);
5742                 if (ret)
5743                         break;
5744 skip_csum_check:
5745                 if (!num_bytes) {
5746                         offset = key.offset;
5747                 } else if (key.offset != offset + num_bytes) {
5748                         ret = check_extent_exists(root, offset, num_bytes);
5749                         if (ret) {
5750                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5751                                         "there is no extent record\n",
5752                                         offset, offset+num_bytes);
5753                                 errors++;
5754                         }
5755                         offset = key.offset;
5756                         num_bytes = 0;
5757                 }
5758                 num_bytes += data_len;
5759                 path->slots[0]++;
5760         }
5761
5762         btrfs_free_path(path);
5763         return errors;
5764 }
5765
5766 static int is_dropped_key(struct btrfs_key *key,
5767                           struct btrfs_key *drop_key) {
5768         if (key->objectid < drop_key->objectid)
5769                 return 1;
5770         else if (key->objectid == drop_key->objectid) {
5771                 if (key->type < drop_key->type)
5772                         return 1;
5773                 else if (key->type == drop_key->type) {
5774                         if (key->offset < drop_key->offset)
5775                                 return 1;
5776                 }
5777         }
5778         return 0;
5779 }
5780
5781 /*
5782  * Here are the rules for FULL_BACKREF.
5783  *
5784  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5785  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5786  *      FULL_BACKREF set.
5787  * 3) We cow'ed the block walking down a reloc tree.  This is impossible to tell
5788  *    if it happened after the relocation occurred since we'll have dropped the
5789  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5790  *    have no real way to know for sure.
5791  *
5792  * We process the blocks one root at a time, and we start from the lowest root
5793  * objectid and go to the highest.  So we can just lookup the owner backref for
5794  * the record and if we don't find it then we know it doesn't exist and we have
5795  * a FULL BACKREF.
5796  *
5797  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5798  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5799  * be set or not and then we can check later once we've gathered all the refs.
5800  */
5801 static int calc_extent_flag(struct btrfs_root *root,
5802                            struct cache_tree *extent_cache,
5803                            struct extent_buffer *buf,
5804                            struct root_item_record *ri,
5805                            u64 *flags)
5806 {
5807         struct extent_record *rec;
5808         struct cache_extent *cache;
5809         struct tree_backref *tback;
5810         u64 owner = 0;
5811
5812         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5813         /* we have added this extent before */
5814         BUG_ON(!cache);
5815         rec = container_of(cache, struct extent_record, cache);
5816
5817         /*
5818          * Except file/reloc tree, we can not have
5819          * FULL BACKREF MODE
5820          */
5821         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5822                 goto normal;
5823         /*
5824          * root node
5825          */
5826         if (buf->start == ri->bytenr)
5827                 goto normal;
5828
5829         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5830                 goto full_backref;
5831
5832         owner = btrfs_header_owner(buf);
5833         if (owner == ri->objectid)
5834                 goto normal;
5835
5836         tback = find_tree_backref(rec, 0, owner);
5837         if (!tback)
5838                 goto full_backref;
5839 normal:
5840         *flags = 0;
5841         if (rec->flag_block_full_backref != -1 &&
5842             rec->flag_block_full_backref != 0)
5843                 rec->bad_full_backref = 1;
5844         return 0;
5845 full_backref:
5846         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5847         if (rec->flag_block_full_backref != -1 &&
5848             rec->flag_block_full_backref != 1)
5849                 rec->bad_full_backref = 1;
5850         return 0;
5851 }
5852
5853 static int run_next_block(struct btrfs_root *root,
5854                           struct block_info *bits,
5855                           int bits_nr,
5856                           u64 *last,
5857                           struct cache_tree *pending,
5858                           struct cache_tree *seen,
5859                           struct cache_tree *reada,
5860                           struct cache_tree *nodes,
5861                           struct cache_tree *extent_cache,
5862                           struct cache_tree *chunk_cache,
5863                           struct rb_root *dev_cache,
5864                           struct block_group_tree *block_group_cache,
5865                           struct device_extent_tree *dev_extent_cache,
5866                           struct root_item_record *ri)
5867 {
5868         struct extent_buffer *buf;
5869         struct extent_record *rec = NULL;
5870         u64 bytenr;
5871         u32 size;
5872         u64 parent;
5873         u64 owner;
5874         u64 flags;
5875         u64 ptr;
5876         u64 gen = 0;
5877         int ret = 0;
5878         int i;
5879         int nritems;
5880         struct btrfs_key key;
5881         struct cache_extent *cache;
5882         int reada_bits;
5883
5884         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5885                                     bits_nr, &reada_bits);
5886         if (nritems == 0)
5887                 return 1;
5888
5889         if (!reada_bits) {
5890                 for(i = 0; i < nritems; i++) {
5891                         ret = add_cache_extent(reada, bits[i].start,
5892                                                bits[i].size);
5893                         if (ret == -EEXIST)
5894                                 continue;
5895
5896                         /* fixme, get the parent transid */
5897                         readahead_tree_block(root, bits[i].start,
5898                                              bits[i].size, 0);
5899                 }
5900         }
5901         *last = bits[0].start;
5902         bytenr = bits[0].start;
5903         size = bits[0].size;
5904
5905         cache = lookup_cache_extent(pending, bytenr, size);
5906         if (cache) {
5907                 remove_cache_extent(pending, cache);
5908                 free(cache);
5909         }
5910         cache = lookup_cache_extent(reada, bytenr, size);
5911         if (cache) {
5912                 remove_cache_extent(reada, cache);
5913                 free(cache);
5914         }
5915         cache = lookup_cache_extent(nodes, bytenr, size);
5916         if (cache) {
5917                 remove_cache_extent(nodes, cache);
5918                 free(cache);
5919         }
5920         cache = lookup_cache_extent(extent_cache, bytenr, size);
5921         if (cache) {
5922                 rec = container_of(cache, struct extent_record, cache);
5923                 gen = rec->parent_generation;
5924         }
5925
5926         /* fixme, get the real parent transid */
5927         buf = read_tree_block(root, bytenr, size, gen);
5928         if (!extent_buffer_uptodate(buf)) {
5929                 record_bad_block_io(root->fs_info,
5930                                     extent_cache, bytenr, size);
5931                 goto out;
5932         }
5933
5934         nritems = btrfs_header_nritems(buf);
5935
5936         flags = 0;
5937         if (!init_extent_tree) {
5938                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5939                                        btrfs_header_level(buf), 1, NULL,
5940                                        &flags);
5941                 if (ret < 0) {
5942                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5943                         if (ret < 0) {
5944                                 fprintf(stderr, "Couldn't calc extent flags\n");
5945                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5946                         }
5947                 }
5948         } else {
5949                 flags = 0;
5950                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5951                 if (ret < 0) {
5952                         fprintf(stderr, "Couldn't calc extent flags\n");
5953                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5954                 }
5955         }
5956
5957         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5958                 if (ri != NULL &&
5959                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5960                     ri->objectid == btrfs_header_owner(buf)) {
5961                         /*
5962                          * Ok we got to this block from it's original owner and
5963                          * we have FULL_BACKREF set.  Relocation can leave
5964                          * converted blocks over so this is altogether possible,
5965                          * however it's not possible if the generation > the
5966                          * last snapshot, so check for this case.
5967                          */
5968                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5969                             btrfs_header_generation(buf) > ri->last_snapshot) {
5970                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5971                                 rec->bad_full_backref = 1;
5972                         }
5973                 }
5974         } else {
5975                 if (ri != NULL &&
5976                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5977                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5978                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5979                         rec->bad_full_backref = 1;
5980                 }
5981         }
5982
5983         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5984                 rec->flag_block_full_backref = 1;
5985                 parent = bytenr;
5986                 owner = 0;
5987         } else {
5988                 rec->flag_block_full_backref = 0;
5989                 parent = 0;
5990                 owner = btrfs_header_owner(buf);
5991         }
5992
5993         ret = check_block(root, extent_cache, buf, flags);
5994         if (ret)
5995                 goto out;
5996
5997         if (btrfs_is_leaf(buf)) {
5998                 btree_space_waste += btrfs_leaf_free_space(root, buf);
5999                 for (i = 0; i < nritems; i++) {
6000                         struct btrfs_file_extent_item *fi;
6001                         btrfs_item_key_to_cpu(buf, &key, i);
6002                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6003                                 process_extent_item(root, extent_cache, buf,
6004                                                     i);
6005                                 continue;
6006                         }
6007                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6008                                 process_extent_item(root, extent_cache, buf,
6009                                                     i);
6010                                 continue;
6011                         }
6012                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6013                                 total_csum_bytes +=
6014                                         btrfs_item_size_nr(buf, i);
6015                                 continue;
6016                         }
6017                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6018                                 process_chunk_item(chunk_cache, &key, buf, i);
6019                                 continue;
6020                         }
6021                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6022                                 process_device_item(dev_cache, &key, buf, i);
6023                                 continue;
6024                         }
6025                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6026                                 process_block_group_item(block_group_cache,
6027                                         &key, buf, i);
6028                                 continue;
6029                         }
6030                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6031                                 process_device_extent_item(dev_extent_cache,
6032                                         &key, buf, i);
6033                                 continue;
6034
6035                         }
6036                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6037 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6038                                 process_extent_ref_v0(extent_cache, buf, i);
6039 #else
6040                                 BUG();
6041 #endif
6042                                 continue;
6043                         }
6044
6045                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6046                                 add_tree_backref(extent_cache, key.objectid, 0,
6047                                                  key.offset, 0);
6048                                 continue;
6049                         }
6050                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6051                                 add_tree_backref(extent_cache, key.objectid,
6052                                                  key.offset, 0, 0);
6053                                 continue;
6054                         }
6055                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6056                                 struct btrfs_extent_data_ref *ref;
6057                                 ref = btrfs_item_ptr(buf, i,
6058                                                 struct btrfs_extent_data_ref);
6059                                 add_data_backref(extent_cache,
6060                                         key.objectid, 0,
6061                                         btrfs_extent_data_ref_root(buf, ref),
6062                                         btrfs_extent_data_ref_objectid(buf,
6063                                                                        ref),
6064                                         btrfs_extent_data_ref_offset(buf, ref),
6065                                         btrfs_extent_data_ref_count(buf, ref),
6066                                         0, root->sectorsize);
6067                                 continue;
6068                         }
6069                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6070                                 struct btrfs_shared_data_ref *ref;
6071                                 ref = btrfs_item_ptr(buf, i,
6072                                                 struct btrfs_shared_data_ref);
6073                                 add_data_backref(extent_cache,
6074                                         key.objectid, key.offset, 0, 0, 0,
6075                                         btrfs_shared_data_ref_count(buf, ref),
6076                                         0, root->sectorsize);
6077                                 continue;
6078                         }
6079                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6080                                 struct bad_item *bad;
6081
6082                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6083                                         continue;
6084                                 if (!owner)
6085                                         continue;
6086                                 bad = malloc(sizeof(struct bad_item));
6087                                 if (!bad)
6088                                         continue;
6089                                 INIT_LIST_HEAD(&bad->list);
6090                                 memcpy(&bad->key, &key,
6091                                        sizeof(struct btrfs_key));
6092                                 bad->root_id = owner;
6093                                 list_add_tail(&bad->list, &delete_items);
6094                                 continue;
6095                         }
6096                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6097                                 continue;
6098                         fi = btrfs_item_ptr(buf, i,
6099                                             struct btrfs_file_extent_item);
6100                         if (btrfs_file_extent_type(buf, fi) ==
6101                             BTRFS_FILE_EXTENT_INLINE)
6102                                 continue;
6103                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6104                                 continue;
6105
6106                         data_bytes_allocated +=
6107                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6108                         if (data_bytes_allocated < root->sectorsize) {
6109                                 abort();
6110                         }
6111                         data_bytes_referenced +=
6112                                 btrfs_file_extent_num_bytes(buf, fi);
6113                         add_data_backref(extent_cache,
6114                                 btrfs_file_extent_disk_bytenr(buf, fi),
6115                                 parent, owner, key.objectid, key.offset -
6116                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6117                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6118                 }
6119         } else {
6120                 int level;
6121                 struct btrfs_key first_key;
6122
6123                 first_key.objectid = 0;
6124
6125                 if (nritems > 0)
6126                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6127                 level = btrfs_header_level(buf);
6128                 for (i = 0; i < nritems; i++) {
6129                         ptr = btrfs_node_blockptr(buf, i);
6130                         size = btrfs_level_size(root, level - 1);
6131                         btrfs_node_key_to_cpu(buf, &key, i);
6132                         if (ri != NULL) {
6133                                 if ((level == ri->drop_level)
6134                                     && is_dropped_key(&key, &ri->drop_key)) {
6135                                         continue;
6136                                 }
6137                         }
6138                         ret = add_extent_rec(extent_cache, &key,
6139                                              btrfs_node_ptr_generation(buf, i),
6140                                              ptr, size, 0, 0, 1, 0, 1, 0,
6141                                              size);
6142                         BUG_ON(ret);
6143
6144                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6145
6146                         if (level > 1) {
6147                                 add_pending(nodes, seen, ptr, size);
6148                         } else {
6149                                 add_pending(pending, seen, ptr, size);
6150                         }
6151                 }
6152                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6153                                       nritems) * sizeof(struct btrfs_key_ptr);
6154         }
6155         total_btree_bytes += buf->len;
6156         if (fs_root_objectid(btrfs_header_owner(buf)))
6157                 total_fs_tree_bytes += buf->len;
6158         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6159                 total_extent_tree_bytes += buf->len;
6160         if (!found_old_backref &&
6161             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6162             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6163             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6164                 found_old_backref = 1;
6165 out:
6166         free_extent_buffer(buf);
6167         return ret;
6168 }
6169
6170 static int add_root_to_pending(struct extent_buffer *buf,
6171                                struct cache_tree *extent_cache,
6172                                struct cache_tree *pending,
6173                                struct cache_tree *seen,
6174                                struct cache_tree *nodes,
6175                                u64 objectid)
6176 {
6177         if (btrfs_header_level(buf) > 0)
6178                 add_pending(nodes, seen, buf->start, buf->len);
6179         else
6180                 add_pending(pending, seen, buf->start, buf->len);
6181         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6182                        0, 1, 1, 0, 1, 0, buf->len);
6183
6184         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6185             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6186                 add_tree_backref(extent_cache, buf->start, buf->start,
6187                                  0, 1);
6188         else
6189                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6190         return 0;
6191 }
6192
6193 /* as we fix the tree, we might be deleting blocks that
6194  * we're tracking for repair.  This hook makes sure we
6195  * remove any backrefs for blocks as we are fixing them.
6196  */
6197 static int free_extent_hook(struct btrfs_trans_handle *trans,
6198                             struct btrfs_root *root,
6199                             u64 bytenr, u64 num_bytes, u64 parent,
6200                             u64 root_objectid, u64 owner, u64 offset,
6201                             int refs_to_drop)
6202 {
6203         struct extent_record *rec;
6204         struct cache_extent *cache;
6205         int is_data;
6206         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6207
6208         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6209         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6210         if (!cache)
6211                 return 0;
6212
6213         rec = container_of(cache, struct extent_record, cache);
6214         if (is_data) {
6215                 struct data_backref *back;
6216                 back = find_data_backref(rec, parent, root_objectid, owner,
6217                                          offset, 1, bytenr, num_bytes);
6218                 if (!back)
6219                         goto out;
6220                 if (back->node.found_ref) {
6221                         back->found_ref -= refs_to_drop;
6222                         if (rec->refs)
6223                                 rec->refs -= refs_to_drop;
6224                 }
6225                 if (back->node.found_extent_tree) {
6226                         back->num_refs -= refs_to_drop;
6227                         if (rec->extent_item_refs)
6228                                 rec->extent_item_refs -= refs_to_drop;
6229                 }
6230                 if (back->found_ref == 0)
6231                         back->node.found_ref = 0;
6232                 if (back->num_refs == 0)
6233                         back->node.found_extent_tree = 0;
6234
6235                 if (!back->node.found_extent_tree && back->node.found_ref) {
6236                         list_del(&back->node.list);
6237                         free(back);
6238                 }
6239         } else {
6240                 struct tree_backref *back;
6241                 back = find_tree_backref(rec, parent, root_objectid);
6242                 if (!back)
6243                         goto out;
6244                 if (back->node.found_ref) {
6245                         if (rec->refs)
6246                                 rec->refs--;
6247                         back->node.found_ref = 0;
6248                 }
6249                 if (back->node.found_extent_tree) {
6250                         if (rec->extent_item_refs)
6251                                 rec->extent_item_refs--;
6252                         back->node.found_extent_tree = 0;
6253                 }
6254                 if (!back->node.found_extent_tree && back->node.found_ref) {
6255                         list_del(&back->node.list);
6256                         free(back);
6257                 }
6258         }
6259         maybe_free_extent_rec(extent_cache, rec);
6260 out:
6261         return 0;
6262 }
6263
6264 static int delete_extent_records(struct btrfs_trans_handle *trans,
6265                                  struct btrfs_root *root,
6266                                  struct btrfs_path *path,
6267                                  u64 bytenr, u64 new_len)
6268 {
6269         struct btrfs_key key;
6270         struct btrfs_key found_key;
6271         struct extent_buffer *leaf;
6272         int ret;
6273         int slot;
6274
6275
6276         key.objectid = bytenr;
6277         key.type = (u8)-1;
6278         key.offset = (u64)-1;
6279
6280         while(1) {
6281                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6282                                         &key, path, 0, 1);
6283                 if (ret < 0)
6284                         break;
6285
6286                 if (ret > 0) {
6287                         ret = 0;
6288                         if (path->slots[0] == 0)
6289                                 break;
6290                         path->slots[0]--;
6291                 }
6292                 ret = 0;
6293
6294                 leaf = path->nodes[0];
6295                 slot = path->slots[0];
6296
6297                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6298                 if (found_key.objectid != bytenr)
6299                         break;
6300
6301                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6302                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6303                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6304                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6305                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6306                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6307                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6308                         btrfs_release_path(path);
6309                         if (found_key.type == 0) {
6310                                 if (found_key.offset == 0)
6311                                         break;
6312                                 key.offset = found_key.offset - 1;
6313                                 key.type = found_key.type;
6314                         }
6315                         key.type = found_key.type - 1;
6316                         key.offset = (u64)-1;
6317                         continue;
6318                 }
6319
6320                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6321                         found_key.objectid, found_key.type, found_key.offset);
6322
6323                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6324                 if (ret)
6325                         break;
6326                 btrfs_release_path(path);
6327
6328                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6329                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6330                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6331                                 found_key.offset : root->leafsize;
6332
6333                         ret = btrfs_update_block_group(trans, root, bytenr,
6334                                                        bytes, 0, 0);
6335                         if (ret)
6336                                 break;
6337                 }
6338         }
6339
6340         btrfs_release_path(path);
6341         return ret;
6342 }
6343
6344 /*
6345  * for a single backref, this will allocate a new extent
6346  * and add the backref to it.
6347  */
6348 static int record_extent(struct btrfs_trans_handle *trans,
6349                          struct btrfs_fs_info *info,
6350                          struct btrfs_path *path,
6351                          struct extent_record *rec,
6352                          struct extent_backref *back,
6353                          int allocated, u64 flags)
6354 {
6355         int ret;
6356         struct btrfs_root *extent_root = info->extent_root;
6357         struct extent_buffer *leaf;
6358         struct btrfs_key ins_key;
6359         struct btrfs_extent_item *ei;
6360         struct tree_backref *tback;
6361         struct data_backref *dback;
6362         struct btrfs_tree_block_info *bi;
6363
6364         if (!back->is_data)
6365                 rec->max_size = max_t(u64, rec->max_size,
6366                                     info->extent_root->leafsize);
6367
6368         if (!allocated) {
6369                 u32 item_size = sizeof(*ei);
6370
6371                 if (!back->is_data)
6372                         item_size += sizeof(*bi);
6373
6374                 ins_key.objectid = rec->start;
6375                 ins_key.offset = rec->max_size;
6376                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6377
6378                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6379                                         &ins_key, item_size);
6380                 if (ret)
6381                         goto fail;
6382
6383                 leaf = path->nodes[0];
6384                 ei = btrfs_item_ptr(leaf, path->slots[0],
6385                                     struct btrfs_extent_item);
6386
6387                 btrfs_set_extent_refs(leaf, ei, 0);
6388                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6389
6390                 if (back->is_data) {
6391                         btrfs_set_extent_flags(leaf, ei,
6392                                                BTRFS_EXTENT_FLAG_DATA);
6393                 } else {
6394                         struct btrfs_disk_key copy_key;;
6395
6396                         tback = (struct tree_backref *)back;
6397                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6398                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6399                                              sizeof(*bi));
6400
6401                         btrfs_set_disk_key_objectid(&copy_key,
6402                                                     rec->info_objectid);
6403                         btrfs_set_disk_key_type(&copy_key, 0);
6404                         btrfs_set_disk_key_offset(&copy_key, 0);
6405
6406                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6407                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6408
6409                         btrfs_set_extent_flags(leaf, ei,
6410                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6411                 }
6412
6413                 btrfs_mark_buffer_dirty(leaf);
6414                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6415                                                rec->max_size, 1, 0);
6416                 if (ret)
6417                         goto fail;
6418                 btrfs_release_path(path);
6419         }
6420
6421         if (back->is_data) {
6422                 u64 parent;
6423                 int i;
6424
6425                 dback = (struct data_backref *)back;
6426                 if (back->full_backref)
6427                         parent = dback->parent;
6428                 else
6429                         parent = 0;
6430
6431                 for (i = 0; i < dback->found_ref; i++) {
6432                         /* if parent != 0, we're doing a full backref
6433                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6434                          * just makes the backref allocator create a data
6435                          * backref
6436                          */
6437                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6438                                                    rec->start, rec->max_size,
6439                                                    parent,
6440                                                    dback->root,
6441                                                    parent ?
6442                                                    BTRFS_FIRST_FREE_OBJECTID :
6443                                                    dback->owner,
6444                                                    dback->offset);
6445                         if (ret)
6446                                 break;
6447                 }
6448                 fprintf(stderr, "adding new data backref"
6449                                 " on %llu %s %llu owner %llu"
6450                                 " offset %llu found %d\n",
6451                                 (unsigned long long)rec->start,
6452                                 back->full_backref ?
6453                                 "parent" : "root",
6454                                 back->full_backref ?
6455                                 (unsigned long long)parent :
6456                                 (unsigned long long)dback->root,
6457                                 (unsigned long long)dback->owner,
6458                                 (unsigned long long)dback->offset,
6459                                 dback->found_ref);
6460         } else {
6461                 u64 parent;
6462
6463                 tback = (struct tree_backref *)back;
6464                 if (back->full_backref)
6465                         parent = tback->parent;
6466                 else
6467                         parent = 0;
6468
6469                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6470                                            rec->start, rec->max_size,
6471                                            parent, tback->root, 0, 0);
6472                 fprintf(stderr, "adding new tree backref on "
6473                         "start %llu len %llu parent %llu root %llu\n",
6474                         rec->start, rec->max_size, parent, tback->root);
6475         }
6476 fail:
6477         btrfs_release_path(path);
6478         return ret;
6479 }
6480
6481 struct extent_entry {
6482         u64 bytenr;
6483         u64 bytes;
6484         int count;
6485         int broken;
6486         struct list_head list;
6487 };
6488
6489 static struct extent_entry *find_entry(struct list_head *entries,
6490                                        u64 bytenr, u64 bytes)
6491 {
6492         struct extent_entry *entry = NULL;
6493
6494         list_for_each_entry(entry, entries, list) {
6495                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6496                         return entry;
6497         }
6498
6499         return NULL;
6500 }
6501
6502 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6503 {
6504         struct extent_entry *entry, *best = NULL, *prev = NULL;
6505
6506         list_for_each_entry(entry, entries, list) {
6507                 if (!prev) {
6508                         prev = entry;
6509                         continue;
6510                 }
6511
6512                 /*
6513                  * If there are as many broken entries as entries then we know
6514                  * not to trust this particular entry.
6515                  */
6516                 if (entry->broken == entry->count)
6517                         continue;
6518
6519                 /*
6520                  * If our current entry == best then we can't be sure our best
6521                  * is really the best, so we need to keep searching.
6522                  */
6523                 if (best && best->count == entry->count) {
6524                         prev = entry;
6525                         best = NULL;
6526                         continue;
6527                 }
6528
6529                 /* Prev == entry, not good enough, have to keep searching */
6530                 if (!prev->broken && prev->count == entry->count)
6531                         continue;
6532
6533                 if (!best)
6534                         best = (prev->count > entry->count) ? prev : entry;
6535                 else if (best->count < entry->count)
6536                         best = entry;
6537                 prev = entry;
6538         }
6539
6540         return best;
6541 }
6542
6543 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6544                       struct data_backref *dback, struct extent_entry *entry)
6545 {
6546         struct btrfs_trans_handle *trans;
6547         struct btrfs_root *root;
6548         struct btrfs_file_extent_item *fi;
6549         struct extent_buffer *leaf;
6550         struct btrfs_key key;
6551         u64 bytenr, bytes;
6552         int ret, err;
6553
6554         key.objectid = dback->root;
6555         key.type = BTRFS_ROOT_ITEM_KEY;
6556         key.offset = (u64)-1;
6557         root = btrfs_read_fs_root(info, &key);
6558         if (IS_ERR(root)) {
6559                 fprintf(stderr, "Couldn't find root for our ref\n");
6560                 return -EINVAL;
6561         }
6562
6563         /*
6564          * The backref points to the original offset of the extent if it was
6565          * split, so we need to search down to the offset we have and then walk
6566          * forward until we find the backref we're looking for.
6567          */
6568         key.objectid = dback->owner;
6569         key.type = BTRFS_EXTENT_DATA_KEY;
6570         key.offset = dback->offset;
6571         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6572         if (ret < 0) {
6573                 fprintf(stderr, "Error looking up ref %d\n", ret);
6574                 return ret;
6575         }
6576
6577         while (1) {
6578                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6579                         ret = btrfs_next_leaf(root, path);
6580                         if (ret) {
6581                                 fprintf(stderr, "Couldn't find our ref, next\n");
6582                                 return -EINVAL;
6583                         }
6584                 }
6585                 leaf = path->nodes[0];
6586                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6587                 if (key.objectid != dback->owner ||
6588                     key.type != BTRFS_EXTENT_DATA_KEY) {
6589                         fprintf(stderr, "Couldn't find our ref, search\n");
6590                         return -EINVAL;
6591                 }
6592                 fi = btrfs_item_ptr(leaf, path->slots[0],
6593                                     struct btrfs_file_extent_item);
6594                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6595                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6596
6597                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6598                         break;
6599                 path->slots[0]++;
6600         }
6601
6602         btrfs_release_path(path);
6603
6604         trans = btrfs_start_transaction(root, 1);
6605         if (IS_ERR(trans))
6606                 return PTR_ERR(trans);
6607
6608         /*
6609          * Ok we have the key of the file extent we want to fix, now we can cow
6610          * down to the thing and fix it.
6611          */
6612         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6613         if (ret < 0) {
6614                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6615                         key.objectid, key.type, key.offset, ret);
6616                 goto out;
6617         }
6618         if (ret > 0) {
6619                 fprintf(stderr, "Well that's odd, we just found this key "
6620                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6621                         key.offset);
6622                 ret = -EINVAL;
6623                 goto out;
6624         }
6625         leaf = path->nodes[0];
6626         fi = btrfs_item_ptr(leaf, path->slots[0],
6627                             struct btrfs_file_extent_item);
6628
6629         if (btrfs_file_extent_compression(leaf, fi) &&
6630             dback->disk_bytenr != entry->bytenr) {
6631                 fprintf(stderr, "Ref doesn't match the record start and is "
6632                         "compressed, please take a btrfs-image of this file "
6633                         "system and send it to a btrfs developer so they can "
6634                         "complete this functionality for bytenr %Lu\n",
6635                         dback->disk_bytenr);
6636                 ret = -EINVAL;
6637                 goto out;
6638         }
6639
6640         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6641                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6642         } else if (dback->disk_bytenr > entry->bytenr) {
6643                 u64 off_diff, offset;
6644
6645                 off_diff = dback->disk_bytenr - entry->bytenr;
6646                 offset = btrfs_file_extent_offset(leaf, fi);
6647                 if (dback->disk_bytenr + offset +
6648                     btrfs_file_extent_num_bytes(leaf, fi) >
6649                     entry->bytenr + entry->bytes) {
6650                         fprintf(stderr, "Ref is past the entry end, please "
6651                                 "take a btrfs-image of this file system and "
6652                                 "send it to a btrfs developer, ref %Lu\n",
6653                                 dback->disk_bytenr);
6654                         ret = -EINVAL;
6655                         goto out;
6656                 }
6657                 offset += off_diff;
6658                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6659                 btrfs_set_file_extent_offset(leaf, fi, offset);
6660         } else if (dback->disk_bytenr < entry->bytenr) {
6661                 u64 offset;
6662
6663                 offset = btrfs_file_extent_offset(leaf, fi);
6664                 if (dback->disk_bytenr + offset < entry->bytenr) {
6665                         fprintf(stderr, "Ref is before the entry start, please"
6666                                 " take a btrfs-image of this file system and "
6667                                 "send it to a btrfs developer, ref %Lu\n",
6668                                 dback->disk_bytenr);
6669                         ret = -EINVAL;
6670                         goto out;
6671                 }
6672
6673                 offset += dback->disk_bytenr;
6674                 offset -= entry->bytenr;
6675                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6676                 btrfs_set_file_extent_offset(leaf, fi, offset);
6677         }
6678
6679         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6680
6681         /*
6682          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6683          * only do this if we aren't using compression, otherwise it's a
6684          * trickier case.
6685          */
6686         if (!btrfs_file_extent_compression(leaf, fi))
6687                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6688         else
6689                 printf("ram bytes may be wrong?\n");
6690         btrfs_mark_buffer_dirty(leaf);
6691 out:
6692         err = btrfs_commit_transaction(trans, root);
6693         btrfs_release_path(path);
6694         return ret ? ret : err;
6695 }
6696
6697 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6698                            struct extent_record *rec)
6699 {
6700         struct extent_backref *back;
6701         struct data_backref *dback;
6702         struct extent_entry *entry, *best = NULL;
6703         LIST_HEAD(entries);
6704         int nr_entries = 0;
6705         int broken_entries = 0;
6706         int ret = 0;
6707         short mismatch = 0;
6708
6709         /*
6710          * Metadata is easy and the backrefs should always agree on bytenr and
6711          * size, if not we've got bigger issues.
6712          */
6713         if (rec->metadata)
6714                 return 0;
6715
6716         list_for_each_entry(back, &rec->backrefs, list) {
6717                 if (back->full_backref || !back->is_data)
6718                         continue;
6719
6720                 dback = (struct data_backref *)back;
6721
6722                 /*
6723                  * We only pay attention to backrefs that we found a real
6724                  * backref for.
6725                  */
6726                 if (dback->found_ref == 0)
6727                         continue;
6728
6729                 /*
6730                  * For now we only catch when the bytes don't match, not the
6731                  * bytenr.  We can easily do this at the same time, but I want
6732                  * to have a fs image to test on before we just add repair
6733                  * functionality willy-nilly so we know we won't screw up the
6734                  * repair.
6735                  */
6736
6737                 entry = find_entry(&entries, dback->disk_bytenr,
6738                                    dback->bytes);
6739                 if (!entry) {
6740                         entry = malloc(sizeof(struct extent_entry));
6741                         if (!entry) {
6742                                 ret = -ENOMEM;
6743                                 goto out;
6744                         }
6745                         memset(entry, 0, sizeof(*entry));
6746                         entry->bytenr = dback->disk_bytenr;
6747                         entry->bytes = dback->bytes;
6748                         list_add_tail(&entry->list, &entries);
6749                         nr_entries++;
6750                 }
6751
6752                 /*
6753                  * If we only have on entry we may think the entries agree when
6754                  * in reality they don't so we have to do some extra checking.
6755                  */
6756                 if (dback->disk_bytenr != rec->start ||
6757                     dback->bytes != rec->nr || back->broken)
6758                         mismatch = 1;
6759
6760                 if (back->broken) {
6761                         entry->broken++;
6762                         broken_entries++;
6763                 }
6764
6765                 entry->count++;
6766         }
6767
6768         /* Yay all the backrefs agree, carry on good sir */
6769         if (nr_entries <= 1 && !mismatch)
6770                 goto out;
6771
6772         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6773                 "%Lu\n", rec->start);
6774
6775         /*
6776          * First we want to see if the backrefs can agree amongst themselves who
6777          * is right, so figure out which one of the entries has the highest
6778          * count.
6779          */
6780         best = find_most_right_entry(&entries);
6781
6782         /*
6783          * Ok so we may have an even split between what the backrefs think, so
6784          * this is where we use the extent ref to see what it thinks.
6785          */
6786         if (!best) {
6787                 entry = find_entry(&entries, rec->start, rec->nr);
6788                 if (!entry && (!broken_entries || !rec->found_rec)) {
6789                         fprintf(stderr, "Backrefs don't agree with each other "
6790                                 "and extent record doesn't agree with anybody,"
6791                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6792                                 rec->start, rec->nr);
6793                         ret = -EINVAL;
6794                         goto out;
6795                 } else if (!entry) {
6796                         /*
6797                          * Ok our backrefs were broken, we'll assume this is the
6798                          * correct value and add an entry for this range.
6799                          */
6800                         entry = malloc(sizeof(struct extent_entry));
6801                         if (!entry) {
6802                                 ret = -ENOMEM;
6803                                 goto out;
6804                         }
6805                         memset(entry, 0, sizeof(*entry));
6806                         entry->bytenr = rec->start;
6807                         entry->bytes = rec->nr;
6808                         list_add_tail(&entry->list, &entries);
6809                         nr_entries++;
6810                 }
6811                 entry->count++;
6812                 best = find_most_right_entry(&entries);
6813                 if (!best) {
6814                         fprintf(stderr, "Backrefs and extent record evenly "
6815                                 "split on who is right, this is going to "
6816                                 "require user input to fix bytenr %Lu bytes "
6817                                 "%Lu\n", rec->start, rec->nr);
6818                         ret = -EINVAL;
6819                         goto out;
6820                 }
6821         }
6822
6823         /*
6824          * I don't think this can happen currently as we'll abort() if we catch
6825          * this case higher up, but in case somebody removes that we still can't
6826          * deal with it properly here yet, so just bail out of that's the case.
6827          */
6828         if (best->bytenr != rec->start) {
6829                 fprintf(stderr, "Extent start and backref starts don't match, "
6830                         "please use btrfs-image on this file system and send "
6831                         "it to a btrfs developer so they can make fsck fix "
6832                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6833                         rec->start, rec->nr);
6834                 ret = -EINVAL;
6835                 goto out;
6836         }
6837
6838         /*
6839          * Ok great we all agreed on an extent record, let's go find the real
6840          * references and fix up the ones that don't match.
6841          */
6842         list_for_each_entry(back, &rec->backrefs, list) {
6843                 if (back->full_backref || !back->is_data)
6844                         continue;
6845
6846                 dback = (struct data_backref *)back;
6847
6848                 /*
6849                  * Still ignoring backrefs that don't have a real ref attached
6850                  * to them.
6851                  */
6852                 if (dback->found_ref == 0)
6853                         continue;
6854
6855                 if (dback->bytes == best->bytes &&
6856                     dback->disk_bytenr == best->bytenr)
6857                         continue;
6858
6859                 ret = repair_ref(info, path, dback, best);
6860                 if (ret)
6861                         goto out;
6862         }
6863
6864         /*
6865          * Ok we messed with the actual refs, which means we need to drop our
6866          * entire cache and go back and rescan.  I know this is a huge pain and
6867          * adds a lot of extra work, but it's the only way to be safe.  Once all
6868          * the backrefs agree we may not need to do anything to the extent
6869          * record itself.
6870          */
6871         ret = -EAGAIN;
6872 out:
6873         while (!list_empty(&entries)) {
6874                 entry = list_entry(entries.next, struct extent_entry, list);
6875                 list_del_init(&entry->list);
6876                 free(entry);
6877         }
6878         return ret;
6879 }
6880
6881 static int process_duplicates(struct btrfs_root *root,
6882                               struct cache_tree *extent_cache,
6883                               struct extent_record *rec)
6884 {
6885         struct extent_record *good, *tmp;
6886         struct cache_extent *cache;
6887         int ret;
6888
6889         /*
6890          * If we found a extent record for this extent then return, or if we
6891          * have more than one duplicate we are likely going to need to delete
6892          * something.
6893          */
6894         if (rec->found_rec || rec->num_duplicates > 1)
6895                 return 0;
6896
6897         /* Shouldn't happen but just in case */
6898         BUG_ON(!rec->num_duplicates);
6899
6900         /*
6901          * So this happens if we end up with a backref that doesn't match the
6902          * actual extent entry.  So either the backref is bad or the extent
6903          * entry is bad.  Either way we want to have the extent_record actually
6904          * reflect what we found in the extent_tree, so we need to take the
6905          * duplicate out and use that as the extent_record since the only way we
6906          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6907          */
6908         remove_cache_extent(extent_cache, &rec->cache);
6909
6910         good = list_entry(rec->dups.next, struct extent_record, list);
6911         list_del_init(&good->list);
6912         INIT_LIST_HEAD(&good->backrefs);
6913         INIT_LIST_HEAD(&good->dups);
6914         good->cache.start = good->start;
6915         good->cache.size = good->nr;
6916         good->content_checked = 0;
6917         good->owner_ref_checked = 0;
6918         good->num_duplicates = 0;
6919         good->refs = rec->refs;
6920         list_splice_init(&rec->backrefs, &good->backrefs);
6921         while (1) {
6922                 cache = lookup_cache_extent(extent_cache, good->start,
6923                                             good->nr);
6924                 if (!cache)
6925                         break;
6926                 tmp = container_of(cache, struct extent_record, cache);
6927
6928                 /*
6929                  * If we find another overlapping extent and it's found_rec is
6930                  * set then it's a duplicate and we need to try and delete
6931                  * something.
6932                  */
6933                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6934                         if (list_empty(&good->list))
6935                                 list_add_tail(&good->list,
6936                                               &duplicate_extents);
6937                         good->num_duplicates += tmp->num_duplicates + 1;
6938                         list_splice_init(&tmp->dups, &good->dups);
6939                         list_del_init(&tmp->list);
6940                         list_add_tail(&tmp->list, &good->dups);
6941                         remove_cache_extent(extent_cache, &tmp->cache);
6942                         continue;
6943                 }
6944
6945                 /*
6946                  * Ok we have another non extent item backed extent rec, so lets
6947                  * just add it to this extent and carry on like we did above.
6948                  */
6949                 good->refs += tmp->refs;
6950                 list_splice_init(&tmp->backrefs, &good->backrefs);
6951                 remove_cache_extent(extent_cache, &tmp->cache);
6952                 free(tmp);
6953         }
6954         ret = insert_cache_extent(extent_cache, &good->cache);
6955         BUG_ON(ret);
6956         free(rec);
6957         return good->num_duplicates ? 0 : 1;
6958 }
6959
6960 static int delete_duplicate_records(struct btrfs_root *root,
6961                                     struct extent_record *rec)
6962 {
6963         struct btrfs_trans_handle *trans;
6964         LIST_HEAD(delete_list);
6965         struct btrfs_path *path;
6966         struct extent_record *tmp, *good, *n;
6967         int nr_del = 0;
6968         int ret = 0, err;
6969         struct btrfs_key key;
6970
6971         path = btrfs_alloc_path();
6972         if (!path) {
6973                 ret = -ENOMEM;
6974                 goto out;
6975         }
6976
6977         good = rec;
6978         /* Find the record that covers all of the duplicates. */
6979         list_for_each_entry(tmp, &rec->dups, list) {
6980                 if (good->start < tmp->start)
6981                         continue;
6982                 if (good->nr > tmp->nr)
6983                         continue;
6984
6985                 if (tmp->start + tmp->nr < good->start + good->nr) {
6986                         fprintf(stderr, "Ok we have overlapping extents that "
6987                                 "aren't completely covered by eachother, this "
6988                                 "is going to require more careful thought.  "
6989                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6990                                 tmp->start, tmp->nr, good->start, good->nr);
6991                         abort();
6992                 }
6993                 good = tmp;
6994         }
6995
6996         if (good != rec)
6997                 list_add_tail(&rec->list, &delete_list);
6998
6999         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7000                 if (tmp == good)
7001                         continue;
7002                 list_move_tail(&tmp->list, &delete_list);
7003         }
7004
7005         root = root->fs_info->extent_root;
7006         trans = btrfs_start_transaction(root, 1);
7007         if (IS_ERR(trans)) {
7008                 ret = PTR_ERR(trans);
7009                 goto out;
7010         }
7011
7012         list_for_each_entry(tmp, &delete_list, list) {
7013                 if (tmp->found_rec == 0)
7014                         continue;
7015                 key.objectid = tmp->start;
7016                 key.type = BTRFS_EXTENT_ITEM_KEY;
7017                 key.offset = tmp->nr;
7018
7019                 /* Shouldn't happen but just in case */
7020                 if (tmp->metadata) {
7021                         fprintf(stderr, "Well this shouldn't happen, extent "
7022                                 "record overlaps but is metadata? "
7023                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7024                         abort();
7025                 }
7026
7027                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7028                 if (ret) {
7029                         if (ret > 0)
7030                                 ret = -EINVAL;
7031                         break;
7032                 }
7033                 ret = btrfs_del_item(trans, root, path);
7034                 if (ret)
7035                         break;
7036                 btrfs_release_path(path);
7037                 nr_del++;
7038         }
7039         err = btrfs_commit_transaction(trans, root);
7040         if (err && !ret)
7041                 ret = err;
7042 out:
7043         while (!list_empty(&delete_list)) {
7044                 tmp = list_entry(delete_list.next, struct extent_record, list);
7045                 list_del_init(&tmp->list);
7046                 if (tmp == rec)
7047                         continue;
7048                 free(tmp);
7049         }
7050
7051         while (!list_empty(&rec->dups)) {
7052                 tmp = list_entry(rec->dups.next, struct extent_record, list);
7053                 list_del_init(&tmp->list);
7054                 free(tmp);
7055         }
7056
7057         btrfs_free_path(path);
7058
7059         if (!ret && !nr_del)
7060                 rec->num_duplicates = 0;
7061
7062         return ret ? ret : nr_del;
7063 }
7064
7065 static int find_possible_backrefs(struct btrfs_fs_info *info,
7066                                   struct btrfs_path *path,
7067                                   struct cache_tree *extent_cache,
7068                                   struct extent_record *rec)
7069 {
7070         struct btrfs_root *root;
7071         struct extent_backref *back;
7072         struct data_backref *dback;
7073         struct cache_extent *cache;
7074         struct btrfs_file_extent_item *fi;
7075         struct btrfs_key key;
7076         u64 bytenr, bytes;
7077         int ret;
7078
7079         list_for_each_entry(back, &rec->backrefs, list) {
7080                 /* Don't care about full backrefs (poor unloved backrefs) */
7081                 if (back->full_backref || !back->is_data)
7082                         continue;
7083
7084                 dback = (struct data_backref *)back;
7085
7086                 /* We found this one, we don't need to do a lookup */
7087                 if (dback->found_ref)
7088                         continue;
7089
7090                 key.objectid = dback->root;
7091                 key.type = BTRFS_ROOT_ITEM_KEY;
7092                 key.offset = (u64)-1;
7093
7094                 root = btrfs_read_fs_root(info, &key);
7095
7096                 /* No root, definitely a bad ref, skip */
7097                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7098                         continue;
7099                 /* Other err, exit */
7100                 if (IS_ERR(root))
7101                         return PTR_ERR(root);
7102
7103                 key.objectid = dback->owner;
7104                 key.type = BTRFS_EXTENT_DATA_KEY;
7105                 key.offset = dback->offset;
7106                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7107                 if (ret) {
7108                         btrfs_release_path(path);
7109                         if (ret < 0)
7110                                 return ret;
7111                         /* Didn't find it, we can carry on */
7112                         ret = 0;
7113                         continue;
7114                 }
7115
7116                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7117                                     struct btrfs_file_extent_item);
7118                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7119                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7120                 btrfs_release_path(path);
7121                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7122                 if (cache) {
7123                         struct extent_record *tmp;
7124                         tmp = container_of(cache, struct extent_record, cache);
7125
7126                         /*
7127                          * If we found an extent record for the bytenr for this
7128                          * particular backref then we can't add it to our
7129                          * current extent record.  We only want to add backrefs
7130                          * that don't have a corresponding extent item in the
7131                          * extent tree since they likely belong to this record
7132                          * and we need to fix it if it doesn't match bytenrs.
7133                          */
7134                         if  (tmp->found_rec)
7135                                 continue;
7136                 }
7137
7138                 dback->found_ref += 1;
7139                 dback->disk_bytenr = bytenr;
7140                 dback->bytes = bytes;
7141
7142                 /*
7143                  * Set this so the verify backref code knows not to trust the
7144                  * values in this backref.
7145                  */
7146                 back->broken = 1;
7147         }
7148
7149         return 0;
7150 }
7151
7152 /*
7153  * Record orphan data ref into corresponding root.
7154  *
7155  * Return 0 if the extent item contains data ref and recorded.
7156  * Return 1 if the extent item contains no useful data ref
7157  *   On that case, it may contains only shared_dataref or metadata backref
7158  *   or the file extent exists(this should be handled by the extent bytenr
7159  *   recovery routine)
7160  * Return <0 if something goes wrong.
7161  */
7162 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7163                                       struct extent_record *rec)
7164 {
7165         struct btrfs_key key;
7166         struct btrfs_root *dest_root;
7167         struct extent_backref *back;
7168         struct data_backref *dback;
7169         struct orphan_data_extent *orphan;
7170         struct btrfs_path *path;
7171         int recorded_data_ref = 0;
7172         int ret = 0;
7173
7174         if (rec->metadata)
7175                 return 1;
7176         path = btrfs_alloc_path();
7177         if (!path)
7178                 return -ENOMEM;
7179         list_for_each_entry(back, &rec->backrefs, list) {
7180                 if (back->full_backref || !back->is_data ||
7181                     !back->found_extent_tree)
7182                         continue;
7183                 dback = (struct data_backref *)back;
7184                 if (dback->found_ref)
7185                         continue;
7186                 key.objectid = dback->root;
7187                 key.type = BTRFS_ROOT_ITEM_KEY;
7188                 key.offset = (u64)-1;
7189
7190                 dest_root = btrfs_read_fs_root(fs_info, &key);
7191
7192                 /* For non-exist root we just skip it */
7193                 if (IS_ERR(dest_root) || !dest_root)
7194                         continue;
7195
7196                 key.objectid = dback->owner;
7197                 key.type = BTRFS_EXTENT_DATA_KEY;
7198                 key.offset = dback->offset;
7199
7200                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7201                 /*
7202                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7203                  * we need to record it for inode/file extent rebuild.
7204                  * For ret > 0, we record it only for file extent rebuild.
7205                  * For ret == 0, the file extent exists but only bytenr
7206                  * mismatch, let the original bytenr fix routine to handle,
7207                  * don't record it.
7208                  */
7209                 if (ret == 0)
7210                         continue;
7211                 ret = 0;
7212                 orphan = malloc(sizeof(*orphan));
7213                 if (!orphan) {
7214                         ret = -ENOMEM;
7215                         goto out;
7216                 }
7217                 INIT_LIST_HEAD(&orphan->list);
7218                 orphan->root = dback->root;
7219                 orphan->objectid = dback->owner;
7220                 orphan->offset = dback->offset;
7221                 orphan->disk_bytenr = rec->cache.start;
7222                 orphan->disk_len = rec->cache.size;
7223                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7224                 recorded_data_ref = 1;
7225         }
7226 out:
7227         btrfs_free_path(path);
7228         if (!ret)
7229                 return !recorded_data_ref;
7230         else
7231                 return ret;
7232 }
7233
7234 /*
7235  * when an incorrect extent item is found, this will delete
7236  * all of the existing entries for it and recreate them
7237  * based on what the tree scan found.
7238  */
7239 static int fixup_extent_refs(struct btrfs_fs_info *info,
7240                              struct cache_tree *extent_cache,
7241                              struct extent_record *rec)
7242 {
7243         struct btrfs_trans_handle *trans = NULL;
7244         int ret;
7245         struct btrfs_path *path;
7246         struct list_head *cur = rec->backrefs.next;
7247         struct cache_extent *cache;
7248         struct extent_backref *back;
7249         int allocated = 0;
7250         u64 flags = 0;
7251
7252         if (rec->flag_block_full_backref)
7253                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7254
7255         path = btrfs_alloc_path();
7256         if (!path)
7257                 return -ENOMEM;
7258
7259         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7260                 /*
7261                  * Sometimes the backrefs themselves are so broken they don't
7262                  * get attached to any meaningful rec, so first go back and
7263                  * check any of our backrefs that we couldn't find and throw
7264                  * them into the list if we find the backref so that
7265                  * verify_backrefs can figure out what to do.
7266                  */
7267                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7268                 if (ret < 0)
7269                         goto out;
7270         }
7271
7272         /* step one, make sure all of the backrefs agree */
7273         ret = verify_backrefs(info, path, rec);
7274         if (ret < 0)
7275                 goto out;
7276
7277         trans = btrfs_start_transaction(info->extent_root, 1);
7278         if (IS_ERR(trans)) {
7279                 ret = PTR_ERR(trans);
7280                 goto out;
7281         }
7282
7283         /* step two, delete all the existing records */
7284         ret = delete_extent_records(trans, info->extent_root, path,
7285                                     rec->start, rec->max_size);
7286
7287         if (ret < 0)
7288                 goto out;
7289
7290         /* was this block corrupt?  If so, don't add references to it */
7291         cache = lookup_cache_extent(info->corrupt_blocks,
7292                                     rec->start, rec->max_size);
7293         if (cache) {
7294                 ret = 0;
7295                 goto out;
7296         }
7297
7298         /* step three, recreate all the refs we did find */
7299         while(cur != &rec->backrefs) {
7300                 back = list_entry(cur, struct extent_backref, list);
7301                 cur = cur->next;
7302
7303                 /*
7304                  * if we didn't find any references, don't create a
7305                  * new extent record
7306                  */
7307                 if (!back->found_ref)
7308                         continue;
7309
7310                 rec->bad_full_backref = 0;
7311                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7312                 allocated = 1;
7313
7314                 if (ret)
7315                         goto out;
7316         }
7317 out:
7318         if (trans) {
7319                 int err = btrfs_commit_transaction(trans, info->extent_root);
7320                 if (!ret)
7321                         ret = err;
7322         }
7323
7324         btrfs_free_path(path);
7325         return ret;
7326 }
7327
7328 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7329                               struct extent_record *rec)
7330 {
7331         struct btrfs_trans_handle *trans;
7332         struct btrfs_root *root = fs_info->extent_root;
7333         struct btrfs_path *path;
7334         struct btrfs_extent_item *ei;
7335         struct btrfs_key key;
7336         u64 flags;
7337         int ret = 0;
7338
7339         key.objectid = rec->start;
7340         if (rec->metadata) {
7341                 key.type = BTRFS_METADATA_ITEM_KEY;
7342                 key.offset = rec->info_level;
7343         } else {
7344                 key.type = BTRFS_EXTENT_ITEM_KEY;
7345                 key.offset = rec->max_size;
7346         }
7347
7348         path = btrfs_alloc_path();
7349         if (!path)
7350                 return -ENOMEM;
7351
7352         trans = btrfs_start_transaction(root, 0);
7353         if (IS_ERR(trans)) {
7354                 btrfs_free_path(path);
7355                 return PTR_ERR(trans);
7356         }
7357
7358         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7359         if (ret < 0) {
7360                 btrfs_free_path(path);
7361                 btrfs_commit_transaction(trans, root);
7362                 return ret;
7363         } else if (ret) {
7364                 fprintf(stderr, "Didn't find extent for %llu\n",
7365                         (unsigned long long)rec->start);
7366                 btrfs_free_path(path);
7367                 btrfs_commit_transaction(trans, root);
7368                 return -ENOENT;
7369         }
7370
7371         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7372                             struct btrfs_extent_item);
7373         flags = btrfs_extent_flags(path->nodes[0], ei);
7374         if (rec->flag_block_full_backref) {
7375                 fprintf(stderr, "setting full backref on %llu\n",
7376                         (unsigned long long)key.objectid);
7377                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7378         } else {
7379                 fprintf(stderr, "clearing full backref on %llu\n",
7380                         (unsigned long long)key.objectid);
7381                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7382         }
7383         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7384         btrfs_mark_buffer_dirty(path->nodes[0]);
7385         btrfs_free_path(path);
7386         return btrfs_commit_transaction(trans, root);
7387 }
7388
7389 /* right now we only prune from the extent allocation tree */
7390 static int prune_one_block(struct btrfs_trans_handle *trans,
7391                            struct btrfs_fs_info *info,
7392                            struct btrfs_corrupt_block *corrupt)
7393 {
7394         int ret;
7395         struct btrfs_path path;
7396         struct extent_buffer *eb;
7397         u64 found;
7398         int slot;
7399         int nritems;
7400         int level = corrupt->level + 1;
7401
7402         btrfs_init_path(&path);
7403 again:
7404         /* we want to stop at the parent to our busted block */
7405         path.lowest_level = level;
7406
7407         ret = btrfs_search_slot(trans, info->extent_root,
7408                                 &corrupt->key, &path, -1, 1);
7409
7410         if (ret < 0)
7411                 goto out;
7412
7413         eb = path.nodes[level];
7414         if (!eb) {
7415                 ret = -ENOENT;
7416                 goto out;
7417         }
7418
7419         /*
7420          * hopefully the search gave us the block we want to prune,
7421          * lets try that first
7422          */
7423         slot = path.slots[level];
7424         found =  btrfs_node_blockptr(eb, slot);
7425         if (found == corrupt->cache.start)
7426                 goto del_ptr;
7427
7428         nritems = btrfs_header_nritems(eb);
7429
7430         /* the search failed, lets scan this node and hope we find it */
7431         for (slot = 0; slot < nritems; slot++) {
7432                 found =  btrfs_node_blockptr(eb, slot);
7433                 if (found == corrupt->cache.start)
7434                         goto del_ptr;
7435         }
7436         /*
7437          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7438          * to this block
7439          */
7440         if (eb == info->extent_root->node) {
7441                 ret = -ENOENT;
7442                 goto out;
7443         } else {
7444                 level++;
7445                 btrfs_release_path(&path);
7446                 goto again;
7447         }
7448
7449 del_ptr:
7450         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7451         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7452
7453 out:
7454         btrfs_release_path(&path);
7455         return ret;
7456 }
7457
7458 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7459 {
7460         struct btrfs_trans_handle *trans = NULL;
7461         struct cache_extent *cache;
7462         struct btrfs_corrupt_block *corrupt;
7463
7464         while (1) {
7465                 cache = search_cache_extent(info->corrupt_blocks, 0);
7466                 if (!cache)
7467                         break;
7468                 if (!trans) {
7469                         trans = btrfs_start_transaction(info->extent_root, 1);
7470                         if (IS_ERR(trans))
7471                                 return PTR_ERR(trans);
7472                 }
7473                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7474                 prune_one_block(trans, info, corrupt);
7475                 remove_cache_extent(info->corrupt_blocks, cache);
7476         }
7477         if (trans)
7478                 return btrfs_commit_transaction(trans, info->extent_root);
7479         return 0;
7480 }
7481
7482 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7483 {
7484         struct btrfs_block_group_cache *cache;
7485         u64 start, end;
7486         int ret;
7487
7488         while (1) {
7489                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7490                                             &start, &end, EXTENT_DIRTY);
7491                 if (ret)
7492                         break;
7493                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7494                                    GFP_NOFS);
7495         }
7496
7497         start = 0;
7498         while (1) {
7499                 cache = btrfs_lookup_first_block_group(fs_info, start);
7500                 if (!cache)
7501                         break;
7502                 if (cache->cached)
7503                         cache->cached = 0;
7504                 start = cache->key.objectid + cache->key.offset;
7505         }
7506 }
7507
7508 static int check_extent_refs(struct btrfs_root *root,
7509                              struct cache_tree *extent_cache)
7510 {
7511         struct extent_record *rec;
7512         struct cache_extent *cache;
7513         int err = 0;
7514         int ret = 0;
7515         int fixed = 0;
7516         int had_dups = 0;
7517         int recorded = 0;
7518
7519         if (repair) {
7520                 /*
7521                  * if we're doing a repair, we have to make sure
7522                  * we don't allocate from the problem extents.
7523                  * In the worst case, this will be all the
7524                  * extents in the FS
7525                  */
7526                 cache = search_cache_extent(extent_cache, 0);
7527                 while(cache) {
7528                         rec = container_of(cache, struct extent_record, cache);
7529                         set_extent_dirty(root->fs_info->excluded_extents,
7530                                          rec->start,
7531                                          rec->start + rec->max_size - 1,
7532                                          GFP_NOFS);
7533                         cache = next_cache_extent(cache);
7534                 }
7535
7536                 /* pin down all the corrupted blocks too */
7537                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7538                 while(cache) {
7539                         set_extent_dirty(root->fs_info->excluded_extents,
7540                                          cache->start,
7541                                          cache->start + cache->size - 1,
7542                                          GFP_NOFS);
7543                         cache = next_cache_extent(cache);
7544                 }
7545                 prune_corrupt_blocks(root->fs_info);
7546                 reset_cached_block_groups(root->fs_info);
7547         }
7548
7549         reset_cached_block_groups(root->fs_info);
7550
7551         /*
7552          * We need to delete any duplicate entries we find first otherwise we
7553          * could mess up the extent tree when we have backrefs that actually
7554          * belong to a different extent item and not the weird duplicate one.
7555          */
7556         while (repair && !list_empty(&duplicate_extents)) {
7557                 rec = list_entry(duplicate_extents.next, struct extent_record,
7558                                  list);
7559                 list_del_init(&rec->list);
7560
7561                 /* Sometimes we can find a backref before we find an actual
7562                  * extent, so we need to process it a little bit to see if there
7563                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7564                  * if this is a backref screwup.  If we need to delete stuff
7565                  * process_duplicates() will return 0, otherwise it will return
7566                  * 1 and we
7567                  */
7568                 if (process_duplicates(root, extent_cache, rec))
7569                         continue;
7570                 ret = delete_duplicate_records(root, rec);
7571                 if (ret < 0)
7572                         return ret;
7573                 /*
7574                  * delete_duplicate_records will return the number of entries
7575                  * deleted, so if it's greater than 0 then we know we actually
7576                  * did something and we need to remove.
7577                  */
7578                 if (ret)
7579                         had_dups = 1;
7580         }
7581
7582         if (had_dups)
7583                 return -EAGAIN;
7584
7585         while(1) {
7586                 int cur_err = 0;
7587
7588                 fixed = 0;
7589                 recorded = 0;
7590                 cache = search_cache_extent(extent_cache, 0);
7591                 if (!cache)
7592                         break;
7593                 rec = container_of(cache, struct extent_record, cache);
7594                 if (rec->num_duplicates) {
7595                         fprintf(stderr, "extent item %llu has multiple extent "
7596                                 "items\n", (unsigned long long)rec->start);
7597                         err = 1;
7598                         cur_err = 1;
7599                 }
7600
7601                 if (rec->refs != rec->extent_item_refs) {
7602                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7603                                 (unsigned long long)rec->start,
7604                                 (unsigned long long)rec->nr);
7605                         fprintf(stderr, "extent item %llu, found %llu\n",
7606                                 (unsigned long long)rec->extent_item_refs,
7607                                 (unsigned long long)rec->refs);
7608                         ret = record_orphan_data_extents(root->fs_info, rec);
7609                         if (ret < 0)
7610                                 goto repair_abort;
7611                         if (ret == 0) {
7612                                 recorded = 1;
7613                         } else {
7614                                 /*
7615                                  * we can't use the extent to repair file
7616                                  * extent, let the fallback method handle it.
7617                                  */
7618                                 if (!fixed && repair) {
7619                                         ret = fixup_extent_refs(
7620                                                         root->fs_info,
7621                                                         extent_cache, rec);
7622                                         if (ret)
7623                                                 goto repair_abort;
7624                                         fixed = 1;
7625                                 }
7626                         }
7627                         err = 1;
7628                         cur_err = 1;
7629                 }
7630                 if (all_backpointers_checked(rec, 1)) {
7631                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7632                                 (unsigned long long)rec->start,
7633                                 (unsigned long long)rec->nr);
7634
7635                         if (!fixed && !recorded && repair) {
7636                                 ret = fixup_extent_refs(root->fs_info,
7637                                                         extent_cache, rec);
7638                                 if (ret)
7639                                         goto repair_abort;
7640                                 fixed = 1;
7641                         }
7642                         cur_err = 1;
7643                         err = 1;
7644                 }
7645                 if (!rec->owner_ref_checked) {
7646                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7647                                 (unsigned long long)rec->start,
7648                                 (unsigned long long)rec->nr);
7649                         if (!fixed && !recorded && repair) {
7650                                 ret = fixup_extent_refs(root->fs_info,
7651                                                         extent_cache, rec);
7652                                 if (ret)
7653                                         goto repair_abort;
7654                                 fixed = 1;
7655                         }
7656                         err = 1;
7657                         cur_err = 1;
7658                 }
7659                 if (rec->bad_full_backref) {
7660                         fprintf(stderr, "bad full backref, on [%llu]\n",
7661                                 (unsigned long long)rec->start);
7662                         if (repair) {
7663                                 ret = fixup_extent_flags(root->fs_info, rec);
7664                                 if (ret)
7665                                         goto repair_abort;
7666                                 fixed = 1;
7667                         }
7668                         err = 1;
7669                         cur_err = 1;
7670                 }
7671                 /*
7672                  * Although it's not a extent ref's problem, we reuse this
7673                  * routine for error reporting.
7674                  * No repair function yet.
7675                  */
7676                 if (rec->crossing_stripes) {
7677                         fprintf(stderr,
7678                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7679                                 rec->start, rec->start + rec->max_size);
7680                         err = 1;
7681                         cur_err = 1;
7682                 }
7683
7684                 if (rec->wrong_chunk_type) {
7685                         fprintf(stderr,
7686                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7687                                 rec->start, rec->start + rec->max_size);
7688                         err = 1;
7689                         cur_err = 1;
7690                 }
7691
7692                 remove_cache_extent(extent_cache, cache);
7693                 free_all_extent_backrefs(rec);
7694                 if (!init_extent_tree && repair && (!cur_err || fixed))
7695                         clear_extent_dirty(root->fs_info->excluded_extents,
7696                                            rec->start,
7697                                            rec->start + rec->max_size - 1,
7698                                            GFP_NOFS);
7699                 free(rec);
7700         }
7701 repair_abort:
7702         if (repair) {
7703                 if (ret && ret != -EAGAIN) {
7704                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7705                         exit(1);
7706                 } else if (!ret) {
7707                         struct btrfs_trans_handle *trans;
7708
7709                         root = root->fs_info->extent_root;
7710                         trans = btrfs_start_transaction(root, 1);
7711                         if (IS_ERR(trans)) {
7712                                 ret = PTR_ERR(trans);
7713                                 goto repair_abort;
7714                         }
7715
7716                         btrfs_fix_block_accounting(trans, root);
7717                         ret = btrfs_commit_transaction(trans, root);
7718                         if (ret)
7719                                 goto repair_abort;
7720                 }
7721                 if (err)
7722                         fprintf(stderr, "repaired damaged extent references\n");
7723                 return ret;
7724         }
7725         return err;
7726 }
7727
7728 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7729 {
7730         u64 stripe_size;
7731
7732         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7733                 stripe_size = length;
7734                 stripe_size /= num_stripes;
7735         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7736                 stripe_size = length * 2;
7737                 stripe_size /= num_stripes;
7738         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7739                 stripe_size = length;
7740                 stripe_size /= (num_stripes - 1);
7741         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7742                 stripe_size = length;
7743                 stripe_size /= (num_stripes - 2);
7744         } else {
7745                 stripe_size = length;
7746         }
7747         return stripe_size;
7748 }
7749
7750 /*
7751  * Check the chunk with its block group/dev list ref:
7752  * Return 0 if all refs seems valid.
7753  * Return 1 if part of refs seems valid, need later check for rebuild ref
7754  * like missing block group and needs to search extent tree to rebuild them.
7755  * Return -1 if essential refs are missing and unable to rebuild.
7756  */
7757 static int check_chunk_refs(struct chunk_record *chunk_rec,
7758                             struct block_group_tree *block_group_cache,
7759                             struct device_extent_tree *dev_extent_cache,
7760                             int silent)
7761 {
7762         struct cache_extent *block_group_item;
7763         struct block_group_record *block_group_rec;
7764         struct cache_extent *dev_extent_item;
7765         struct device_extent_record *dev_extent_rec;
7766         u64 devid;
7767         u64 offset;
7768         u64 length;
7769         int metadump_v2 = 0;
7770         int i;
7771         int ret = 0;
7772
7773         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7774                                                chunk_rec->offset,
7775                                                chunk_rec->length);
7776         if (block_group_item) {
7777                 block_group_rec = container_of(block_group_item,
7778                                                struct block_group_record,
7779                                                cache);
7780                 if (chunk_rec->length != block_group_rec->offset ||
7781                     chunk_rec->offset != block_group_rec->objectid ||
7782                     (!metadump_v2 &&
7783                      chunk_rec->type_flags != block_group_rec->flags)) {
7784                         if (!silent)
7785                                 fprintf(stderr,
7786                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7787                                         chunk_rec->objectid,
7788                                         chunk_rec->type,
7789                                         chunk_rec->offset,
7790                                         chunk_rec->length,
7791                                         chunk_rec->offset,
7792                                         chunk_rec->type_flags,
7793                                         block_group_rec->objectid,
7794                                         block_group_rec->type,
7795                                         block_group_rec->offset,
7796                                         block_group_rec->offset,
7797                                         block_group_rec->objectid,
7798                                         block_group_rec->flags);
7799                         ret = -1;
7800                 } else {
7801                         list_del_init(&block_group_rec->list);
7802                         chunk_rec->bg_rec = block_group_rec;
7803                 }
7804         } else {
7805                 if (!silent)
7806                         fprintf(stderr,
7807                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7808                                 chunk_rec->objectid,
7809                                 chunk_rec->type,
7810                                 chunk_rec->offset,
7811                                 chunk_rec->length,
7812                                 chunk_rec->offset,
7813                                 chunk_rec->type_flags);
7814                 ret = 1;
7815         }
7816
7817         if (metadump_v2)
7818                 return ret;
7819
7820         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7821                                     chunk_rec->num_stripes);
7822         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7823                 devid = chunk_rec->stripes[i].devid;
7824                 offset = chunk_rec->stripes[i].offset;
7825                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7826                                                        devid, offset, length);
7827                 if (dev_extent_item) {
7828                         dev_extent_rec = container_of(dev_extent_item,
7829                                                 struct device_extent_record,
7830                                                 cache);
7831                         if (dev_extent_rec->objectid != devid ||
7832                             dev_extent_rec->offset != offset ||
7833                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7834                             dev_extent_rec->length != length) {
7835                                 if (!silent)
7836                                         fprintf(stderr,
7837                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7838                                                 chunk_rec->objectid,
7839                                                 chunk_rec->type,
7840                                                 chunk_rec->offset,
7841                                                 chunk_rec->stripes[i].devid,
7842                                                 chunk_rec->stripes[i].offset,
7843                                                 dev_extent_rec->objectid,
7844                                                 dev_extent_rec->offset,
7845                                                 dev_extent_rec->length);
7846                                 ret = -1;
7847                         } else {
7848                                 list_move(&dev_extent_rec->chunk_list,
7849                                           &chunk_rec->dextents);
7850                         }
7851                 } else {
7852                         if (!silent)
7853                                 fprintf(stderr,
7854                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7855                                         chunk_rec->objectid,
7856                                         chunk_rec->type,
7857                                         chunk_rec->offset,
7858                                         chunk_rec->stripes[i].devid,
7859                                         chunk_rec->stripes[i].offset);
7860                         ret = -1;
7861                 }
7862         }
7863         return ret;
7864 }
7865
7866 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7867 int check_chunks(struct cache_tree *chunk_cache,
7868                  struct block_group_tree *block_group_cache,
7869                  struct device_extent_tree *dev_extent_cache,
7870                  struct list_head *good, struct list_head *bad,
7871                  struct list_head *rebuild, int silent)
7872 {
7873         struct cache_extent *chunk_item;
7874         struct chunk_record *chunk_rec;
7875         struct block_group_record *bg_rec;
7876         struct device_extent_record *dext_rec;
7877         int err;
7878         int ret = 0;
7879
7880         chunk_item = first_cache_extent(chunk_cache);
7881         while (chunk_item) {
7882                 chunk_rec = container_of(chunk_item, struct chunk_record,
7883                                          cache);
7884                 err = check_chunk_refs(chunk_rec, block_group_cache,
7885                                        dev_extent_cache, silent);
7886                 if (err < 0)
7887                         ret = err;
7888                 if (err == 0 && good)
7889                         list_add_tail(&chunk_rec->list, good);
7890                 if (err > 0 && rebuild)
7891                         list_add_tail(&chunk_rec->list, rebuild);
7892                 if (err < 0 && bad)
7893                         list_add_tail(&chunk_rec->list, bad);
7894                 chunk_item = next_cache_extent(chunk_item);
7895         }
7896
7897         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7898                 if (!silent)
7899                         fprintf(stderr,
7900                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7901                                 bg_rec->objectid,
7902                                 bg_rec->offset,
7903                                 bg_rec->flags);
7904                 if (!ret)
7905                         ret = 1;
7906         }
7907
7908         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7909                             chunk_list) {
7910                 if (!silent)
7911                         fprintf(stderr,
7912                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7913                                 dext_rec->objectid,
7914                                 dext_rec->offset,
7915                                 dext_rec->length);
7916                 if (!ret)
7917                         ret = 1;
7918         }
7919         return ret;
7920 }
7921
7922
7923 static int check_device_used(struct device_record *dev_rec,
7924                              struct device_extent_tree *dext_cache)
7925 {
7926         struct cache_extent *cache;
7927         struct device_extent_record *dev_extent_rec;
7928         u64 total_byte = 0;
7929
7930         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7931         while (cache) {
7932                 dev_extent_rec = container_of(cache,
7933                                               struct device_extent_record,
7934                                               cache);
7935                 if (dev_extent_rec->objectid != dev_rec->devid)
7936                         break;
7937
7938                 list_del_init(&dev_extent_rec->device_list);
7939                 total_byte += dev_extent_rec->length;
7940                 cache = next_cache_extent(cache);
7941         }
7942
7943         if (total_byte != dev_rec->byte_used) {
7944                 fprintf(stderr,
7945                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7946                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7947                         dev_rec->type, dev_rec->offset);
7948                 return -1;
7949         } else {
7950                 return 0;
7951         }
7952 }
7953
7954 /* check btrfs_dev_item -> btrfs_dev_extent */
7955 static int check_devices(struct rb_root *dev_cache,
7956                          struct device_extent_tree *dev_extent_cache)
7957 {
7958         struct rb_node *dev_node;
7959         struct device_record *dev_rec;
7960         struct device_extent_record *dext_rec;
7961         int err;
7962         int ret = 0;
7963
7964         dev_node = rb_first(dev_cache);
7965         while (dev_node) {
7966                 dev_rec = container_of(dev_node, struct device_record, node);
7967                 err = check_device_used(dev_rec, dev_extent_cache);
7968                 if (err)
7969                         ret = err;
7970
7971                 dev_node = rb_next(dev_node);
7972         }
7973         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7974                             device_list) {
7975                 fprintf(stderr,
7976                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7977                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
7978                 if (!ret)
7979                         ret = 1;
7980         }
7981         return ret;
7982 }
7983
7984 static int add_root_item_to_list(struct list_head *head,
7985                                   u64 objectid, u64 bytenr, u64 last_snapshot,
7986                                   u8 level, u8 drop_level,
7987                                   int level_size, struct btrfs_key *drop_key)
7988 {
7989
7990         struct root_item_record *ri_rec;
7991         ri_rec = malloc(sizeof(*ri_rec));
7992         if (!ri_rec)
7993                 return -ENOMEM;
7994         ri_rec->bytenr = bytenr;
7995         ri_rec->objectid = objectid;
7996         ri_rec->level = level;
7997         ri_rec->level_size = level_size;
7998         ri_rec->drop_level = drop_level;
7999         ri_rec->last_snapshot = last_snapshot;
8000         if (drop_key)
8001                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8002         list_add_tail(&ri_rec->list, head);
8003
8004         return 0;
8005 }
8006
8007 static void free_root_item_list(struct list_head *list)
8008 {
8009         struct root_item_record *ri_rec;
8010
8011         while (!list_empty(list)) {
8012                 ri_rec = list_first_entry(list, struct root_item_record,
8013                                           list);
8014                 list_del_init(&ri_rec->list);
8015                 free(ri_rec);
8016         }
8017 }
8018
8019 static int deal_root_from_list(struct list_head *list,
8020                                struct btrfs_root *root,
8021                                struct block_info *bits,
8022                                int bits_nr,
8023                                struct cache_tree *pending,
8024                                struct cache_tree *seen,
8025                                struct cache_tree *reada,
8026                                struct cache_tree *nodes,
8027                                struct cache_tree *extent_cache,
8028                                struct cache_tree *chunk_cache,
8029                                struct rb_root *dev_cache,
8030                                struct block_group_tree *block_group_cache,
8031                                struct device_extent_tree *dev_extent_cache)
8032 {
8033         int ret = 0;
8034         u64 last;
8035
8036         while (!list_empty(list)) {
8037                 struct root_item_record *rec;
8038                 struct extent_buffer *buf;
8039                 rec = list_entry(list->next,
8040                                  struct root_item_record, list);
8041                 last = 0;
8042                 buf = read_tree_block(root->fs_info->tree_root,
8043                                       rec->bytenr, rec->level_size, 0);
8044                 if (!extent_buffer_uptodate(buf)) {
8045                         free_extent_buffer(buf);
8046                         ret = -EIO;
8047                         break;
8048                 }
8049                 add_root_to_pending(buf, extent_cache, pending,
8050                                     seen, nodes, rec->objectid);
8051                 /*
8052                  * To rebuild extent tree, we need deal with snapshot
8053                  * one by one, otherwise we deal with node firstly which
8054                  * can maximize readahead.
8055                  */
8056                 while (1) {
8057                         ret = run_next_block(root, bits, bits_nr, &last,
8058                                              pending, seen, reada, nodes,
8059                                              extent_cache, chunk_cache,
8060                                              dev_cache, block_group_cache,
8061                                              dev_extent_cache, rec);
8062                         if (ret != 0)
8063                                 break;
8064                 }
8065                 free_extent_buffer(buf);
8066                 list_del(&rec->list);
8067                 free(rec);
8068                 if (ret < 0)
8069                         break;
8070         }
8071         while (ret >= 0) {
8072                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8073                                      reada, nodes, extent_cache, chunk_cache,
8074                                      dev_cache, block_group_cache,
8075                                      dev_extent_cache, NULL);
8076                 if (ret != 0) {
8077                         if (ret > 0)
8078                                 ret = 0;
8079                         break;
8080                 }
8081         }
8082         return ret;
8083 }
8084
8085 static int check_chunks_and_extents(struct btrfs_root *root)
8086 {
8087         struct rb_root dev_cache;
8088         struct cache_tree chunk_cache;
8089         struct block_group_tree block_group_cache;
8090         struct device_extent_tree dev_extent_cache;
8091         struct cache_tree extent_cache;
8092         struct cache_tree seen;
8093         struct cache_tree pending;
8094         struct cache_tree reada;
8095         struct cache_tree nodes;
8096         struct extent_io_tree excluded_extents;
8097         struct cache_tree corrupt_blocks;
8098         struct btrfs_path path;
8099         struct btrfs_key key;
8100         struct btrfs_key found_key;
8101         int ret, err = 0;
8102         struct block_info *bits;
8103         int bits_nr;
8104         struct extent_buffer *leaf;
8105         int slot;
8106         struct btrfs_root_item ri;
8107         struct list_head dropping_trees;
8108         struct list_head normal_trees;
8109         struct btrfs_root *root1;
8110         u64 objectid;
8111         u32 level_size;
8112         u8 level;
8113
8114         dev_cache = RB_ROOT;
8115         cache_tree_init(&chunk_cache);
8116         block_group_tree_init(&block_group_cache);
8117         device_extent_tree_init(&dev_extent_cache);
8118
8119         cache_tree_init(&extent_cache);
8120         cache_tree_init(&seen);
8121         cache_tree_init(&pending);
8122         cache_tree_init(&nodes);
8123         cache_tree_init(&reada);
8124         cache_tree_init(&corrupt_blocks);
8125         extent_io_tree_init(&excluded_extents);
8126         INIT_LIST_HEAD(&dropping_trees);
8127         INIT_LIST_HEAD(&normal_trees);
8128
8129         if (repair) {
8130                 root->fs_info->excluded_extents = &excluded_extents;
8131                 root->fs_info->fsck_extent_cache = &extent_cache;
8132                 root->fs_info->free_extent_hook = free_extent_hook;
8133                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8134         }
8135
8136         bits_nr = 1024;
8137         bits = malloc(bits_nr * sizeof(struct block_info));
8138         if (!bits) {
8139                 perror("malloc");
8140                 exit(1);
8141         }
8142
8143         if (ctx.progress_enabled) {
8144                 ctx.tp = TASK_EXTENTS;
8145                 task_start(ctx.info);
8146         }
8147
8148 again:
8149         root1 = root->fs_info->tree_root;
8150         level = btrfs_header_level(root1->node);
8151         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8152                                     root1->node->start, 0, level, 0,
8153                                     btrfs_level_size(root1, level), NULL);
8154         if (ret < 0)
8155                 goto out;
8156         root1 = root->fs_info->chunk_root;
8157         level = btrfs_header_level(root1->node);
8158         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8159                                     root1->node->start, 0, level, 0,
8160                                     btrfs_level_size(root1, level), NULL);
8161         if (ret < 0)
8162                 goto out;
8163         btrfs_init_path(&path);
8164         key.offset = 0;
8165         key.objectid = 0;
8166         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8167         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8168                                         &key, &path, 0, 0);
8169         if (ret < 0)
8170                 goto out;
8171         while(1) {
8172                 leaf = path.nodes[0];
8173                 slot = path.slots[0];
8174                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8175                         ret = btrfs_next_leaf(root, &path);
8176                         if (ret != 0)
8177                                 break;
8178                         leaf = path.nodes[0];
8179                         slot = path.slots[0];
8180                 }
8181                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8182                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8183                         unsigned long offset;
8184                         u64 last_snapshot;
8185
8186                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8187                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8188                         last_snapshot = btrfs_root_last_snapshot(&ri);
8189                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8190                                 level = btrfs_root_level(&ri);
8191                                 level_size = btrfs_level_size(root, level);
8192                                 ret = add_root_item_to_list(&normal_trees,
8193                                                 found_key.objectid,
8194                                                 btrfs_root_bytenr(&ri),
8195                                                 last_snapshot, level,
8196                                                 0, level_size, NULL);
8197                                 if (ret < 0)
8198                                         goto out;
8199                         } else {
8200                                 level = btrfs_root_level(&ri);
8201                                 level_size = btrfs_level_size(root, level);
8202                                 objectid = found_key.objectid;
8203                                 btrfs_disk_key_to_cpu(&found_key,
8204                                                       &ri.drop_progress);
8205                                 ret = add_root_item_to_list(&dropping_trees,
8206                                                 objectid,
8207                                                 btrfs_root_bytenr(&ri),
8208                                                 last_snapshot, level,
8209                                                 ri.drop_level,
8210                                                 level_size, &found_key);
8211                                 if (ret < 0)
8212                                         goto out;
8213                         }
8214                 }
8215                 path.slots[0]++;
8216         }
8217         btrfs_release_path(&path);
8218
8219         /*
8220          * check_block can return -EAGAIN if it fixes something, please keep
8221          * this in mind when dealing with return values from these functions, if
8222          * we get -EAGAIN we want to fall through and restart the loop.
8223          */
8224         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8225                                   &seen, &reada, &nodes, &extent_cache,
8226                                   &chunk_cache, &dev_cache, &block_group_cache,
8227                                   &dev_extent_cache);
8228         if (ret < 0) {
8229                 if (ret == -EAGAIN)
8230                         goto loop;
8231                 goto out;
8232         }
8233         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8234                                   &pending, &seen, &reada, &nodes,
8235                                   &extent_cache, &chunk_cache, &dev_cache,
8236                                   &block_group_cache, &dev_extent_cache);
8237         if (ret < 0) {
8238                 if (ret == -EAGAIN)
8239                         goto loop;
8240                 goto out;
8241         }
8242
8243         ret = check_chunks(&chunk_cache, &block_group_cache,
8244                            &dev_extent_cache, NULL, NULL, NULL, 0);
8245         if (ret) {
8246                 if (ret == -EAGAIN)
8247                         goto loop;
8248                 err = ret;
8249         }
8250
8251         ret = check_extent_refs(root, &extent_cache);
8252         if (ret < 0) {
8253                 if (ret == -EAGAIN)
8254                         goto loop;
8255                 goto out;
8256         }
8257
8258         ret = check_devices(&dev_cache, &dev_extent_cache);
8259         if (ret && err)
8260                 ret = err;
8261
8262 out:
8263         task_stop(ctx.info);
8264         if (repair) {
8265                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8266                 extent_io_tree_cleanup(&excluded_extents);
8267                 root->fs_info->fsck_extent_cache = NULL;
8268                 root->fs_info->free_extent_hook = NULL;
8269                 root->fs_info->corrupt_blocks = NULL;
8270                 root->fs_info->excluded_extents = NULL;
8271         }
8272         free(bits);
8273         free_chunk_cache_tree(&chunk_cache);
8274         free_device_cache_tree(&dev_cache);
8275         free_block_group_tree(&block_group_cache);
8276         free_device_extent_tree(&dev_extent_cache);
8277         free_extent_cache_tree(&seen);
8278         free_extent_cache_tree(&pending);
8279         free_extent_cache_tree(&reada);
8280         free_extent_cache_tree(&nodes);
8281         return ret;
8282 loop:
8283         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8284         free_extent_cache_tree(&seen);
8285         free_extent_cache_tree(&pending);
8286         free_extent_cache_tree(&reada);
8287         free_extent_cache_tree(&nodes);
8288         free_chunk_cache_tree(&chunk_cache);
8289         free_block_group_tree(&block_group_cache);
8290         free_device_cache_tree(&dev_cache);
8291         free_device_extent_tree(&dev_extent_cache);
8292         free_extent_record_cache(root->fs_info, &extent_cache);
8293         free_root_item_list(&normal_trees);
8294         free_root_item_list(&dropping_trees);
8295         extent_io_tree_cleanup(&excluded_extents);
8296         goto again;
8297 }
8298
8299 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8300                            struct btrfs_root *root, int overwrite)
8301 {
8302         struct extent_buffer *c;
8303         struct extent_buffer *old = root->node;
8304         int level;
8305         int ret;
8306         struct btrfs_disk_key disk_key = {0,0,0};
8307
8308         level = 0;
8309
8310         if (overwrite) {
8311                 c = old;
8312                 extent_buffer_get(c);
8313                 goto init;
8314         }
8315         c = btrfs_alloc_free_block(trans, root,
8316                                    btrfs_level_size(root, 0),
8317                                    root->root_key.objectid,
8318                                    &disk_key, level, 0, 0);
8319         if (IS_ERR(c)) {
8320                 c = old;
8321                 extent_buffer_get(c);
8322                 overwrite = 1;
8323         }
8324 init:
8325         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8326         btrfs_set_header_level(c, level);
8327         btrfs_set_header_bytenr(c, c->start);
8328         btrfs_set_header_generation(c, trans->transid);
8329         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8330         btrfs_set_header_owner(c, root->root_key.objectid);
8331
8332         write_extent_buffer(c, root->fs_info->fsid,
8333                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8334
8335         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8336                             btrfs_header_chunk_tree_uuid(c),
8337                             BTRFS_UUID_SIZE);
8338
8339         btrfs_mark_buffer_dirty(c);
8340         /*
8341          * this case can happen in the following case:
8342          *
8343          * 1.overwrite previous root.
8344          *
8345          * 2.reinit reloc data root, this is because we skip pin
8346          * down reloc data tree before which means we can allocate
8347          * same block bytenr here.
8348          */
8349         if (old->start == c->start) {
8350                 btrfs_set_root_generation(&root->root_item,
8351                                           trans->transid);
8352                 root->root_item.level = btrfs_header_level(root->node);
8353                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8354                                         &root->root_key, &root->root_item);
8355                 if (ret) {
8356                         free_extent_buffer(c);
8357                         return ret;
8358                 }
8359         }
8360         free_extent_buffer(old);
8361         root->node = c;
8362         add_root_to_dirty_list(root);
8363         return 0;
8364 }
8365
8366 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8367                                 struct extent_buffer *eb, int tree_root)
8368 {
8369         struct extent_buffer *tmp;
8370         struct btrfs_root_item *ri;
8371         struct btrfs_key key;
8372         u64 bytenr;
8373         u32 leafsize;
8374         int level = btrfs_header_level(eb);
8375         int nritems;
8376         int ret;
8377         int i;
8378
8379         /*
8380          * If we have pinned this block before, don't pin it again.
8381          * This can not only avoid forever loop with broken filesystem
8382          * but also give us some speedups.
8383          */
8384         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8385                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8386                 return 0;
8387
8388         btrfs_pin_extent(fs_info, eb->start, eb->len);
8389
8390         leafsize = btrfs_super_leafsize(fs_info->super_copy);
8391         nritems = btrfs_header_nritems(eb);
8392         for (i = 0; i < nritems; i++) {
8393                 if (level == 0) {
8394                         btrfs_item_key_to_cpu(eb, &key, i);
8395                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8396                                 continue;
8397                         /* Skip the extent root and reloc roots */
8398                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8399                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8400                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8401                                 continue;
8402                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8403                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8404
8405                         /*
8406                          * If at any point we start needing the real root we
8407                          * will have to build a stump root for the root we are
8408                          * in, but for now this doesn't actually use the root so
8409                          * just pass in extent_root.
8410                          */
8411                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8412                                               leafsize, 0);
8413                         if (!extent_buffer_uptodate(tmp)) {
8414                                 fprintf(stderr, "Error reading root block\n");
8415                                 return -EIO;
8416                         }
8417                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8418                         free_extent_buffer(tmp);
8419                         if (ret)
8420                                 return ret;
8421                 } else {
8422                         bytenr = btrfs_node_blockptr(eb, i);
8423
8424                         /* If we aren't the tree root don't read the block */
8425                         if (level == 1 && !tree_root) {
8426                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
8427                                 continue;
8428                         }
8429
8430                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8431                                               leafsize, 0);
8432                         if (!extent_buffer_uptodate(tmp)) {
8433                                 fprintf(stderr, "Error reading tree block\n");
8434                                 return -EIO;
8435                         }
8436                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8437                         free_extent_buffer(tmp);
8438                         if (ret)
8439                                 return ret;
8440                 }
8441         }
8442
8443         return 0;
8444 }
8445
8446 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8447 {
8448         int ret;
8449
8450         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8451         if (ret)
8452                 return ret;
8453
8454         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8455 }
8456
8457 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8458 {
8459         struct btrfs_block_group_cache *cache;
8460         struct btrfs_path *path;
8461         struct extent_buffer *leaf;
8462         struct btrfs_chunk *chunk;
8463         struct btrfs_key key;
8464         int ret;
8465         u64 start;
8466
8467         path = btrfs_alloc_path();
8468         if (!path)
8469                 return -ENOMEM;
8470
8471         key.objectid = 0;
8472         key.type = BTRFS_CHUNK_ITEM_KEY;
8473         key.offset = 0;
8474
8475         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8476         if (ret < 0) {
8477                 btrfs_free_path(path);
8478                 return ret;
8479         }
8480
8481         /*
8482          * We do this in case the block groups were screwed up and had alloc
8483          * bits that aren't actually set on the chunks.  This happens with
8484          * restored images every time and could happen in real life I guess.
8485          */
8486         fs_info->avail_data_alloc_bits = 0;
8487         fs_info->avail_metadata_alloc_bits = 0;
8488         fs_info->avail_system_alloc_bits = 0;
8489
8490         /* First we need to create the in-memory block groups */
8491         while (1) {
8492                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8493                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8494                         if (ret < 0) {
8495                                 btrfs_free_path(path);
8496                                 return ret;
8497                         }
8498                         if (ret) {
8499                                 ret = 0;
8500                                 break;
8501                         }
8502                 }
8503                 leaf = path->nodes[0];
8504                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8505                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8506                         path->slots[0]++;
8507                         continue;
8508                 }
8509
8510                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8511                                        struct btrfs_chunk);
8512                 btrfs_add_block_group(fs_info, 0,
8513                                       btrfs_chunk_type(leaf, chunk),
8514                                       key.objectid, key.offset,
8515                                       btrfs_chunk_length(leaf, chunk));
8516                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8517                                  key.offset + btrfs_chunk_length(leaf, chunk),
8518                                  GFP_NOFS);
8519                 path->slots[0]++;
8520         }
8521         start = 0;
8522         while (1) {
8523                 cache = btrfs_lookup_first_block_group(fs_info, start);
8524                 if (!cache)
8525                         break;
8526                 cache->cached = 1;
8527                 start = cache->key.objectid + cache->key.offset;
8528         }
8529
8530         btrfs_free_path(path);
8531         return 0;
8532 }
8533
8534 static int reset_balance(struct btrfs_trans_handle *trans,
8535                          struct btrfs_fs_info *fs_info)
8536 {
8537         struct btrfs_root *root = fs_info->tree_root;
8538         struct btrfs_path *path;
8539         struct extent_buffer *leaf;
8540         struct btrfs_key key;
8541         int del_slot, del_nr = 0;
8542         int ret;
8543         int found = 0;
8544
8545         path = btrfs_alloc_path();
8546         if (!path)
8547                 return -ENOMEM;
8548
8549         key.objectid = BTRFS_BALANCE_OBJECTID;
8550         key.type = BTRFS_BALANCE_ITEM_KEY;
8551         key.offset = 0;
8552
8553         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8554         if (ret) {
8555                 if (ret > 0)
8556                         ret = 0;
8557                 if (!ret)
8558                         goto reinit_data_reloc;
8559                 else
8560                         goto out;
8561         }
8562
8563         ret = btrfs_del_item(trans, root, path);
8564         if (ret)
8565                 goto out;
8566         btrfs_release_path(path);
8567
8568         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8569         key.type = BTRFS_ROOT_ITEM_KEY;
8570         key.offset = 0;
8571
8572         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8573         if (ret < 0)
8574                 goto out;
8575         while (1) {
8576                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8577                         if (!found)
8578                                 break;
8579
8580                         if (del_nr) {
8581                                 ret = btrfs_del_items(trans, root, path,
8582                                                       del_slot, del_nr);
8583                                 del_nr = 0;
8584                                 if (ret)
8585                                         goto out;
8586                         }
8587                         key.offset++;
8588                         btrfs_release_path(path);
8589
8590                         found = 0;
8591                         ret = btrfs_search_slot(trans, root, &key, path,
8592                                                 -1, 1);
8593                         if (ret < 0)
8594                                 goto out;
8595                         continue;
8596                 }
8597                 found = 1;
8598                 leaf = path->nodes[0];
8599                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8600                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8601                         break;
8602                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8603                         path->slots[0]++;
8604                         continue;
8605                 }
8606                 if (!del_nr) {
8607                         del_slot = path->slots[0];
8608                         del_nr = 1;
8609                 } else {
8610                         del_nr++;
8611                 }
8612                 path->slots[0]++;
8613         }
8614
8615         if (del_nr) {
8616                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8617                 if (ret)
8618                         goto out;
8619         }
8620         btrfs_release_path(path);
8621
8622 reinit_data_reloc:
8623         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8624         key.type = BTRFS_ROOT_ITEM_KEY;
8625         key.offset = (u64)-1;
8626         root = btrfs_read_fs_root(fs_info, &key);
8627         if (IS_ERR(root)) {
8628                 fprintf(stderr, "Error reading data reloc tree\n");
8629                 ret = PTR_ERR(root);
8630                 goto out;
8631         }
8632         record_root_in_trans(trans, root);
8633         ret = btrfs_fsck_reinit_root(trans, root, 0);
8634         if (ret)
8635                 goto out;
8636         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8637 out:
8638         btrfs_free_path(path);
8639         return ret;
8640 }
8641
8642 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8643                               struct btrfs_fs_info *fs_info)
8644 {
8645         u64 start = 0;
8646         int ret;
8647
8648         /*
8649          * The only reason we don't do this is because right now we're just
8650          * walking the trees we find and pinning down their bytes, we don't look
8651          * at any of the leaves.  In order to do mixed groups we'd have to check
8652          * the leaves of any fs roots and pin down the bytes for any file
8653          * extents we find.  Not hard but why do it if we don't have to?
8654          */
8655         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8656                 fprintf(stderr, "We don't support re-initing the extent tree "
8657                         "for mixed block groups yet, please notify a btrfs "
8658                         "developer you want to do this so they can add this "
8659                         "functionality.\n");
8660                 return -EINVAL;
8661         }
8662
8663         /*
8664          * first we need to walk all of the trees except the extent tree and pin
8665          * down the bytes that are in use so we don't overwrite any existing
8666          * metadata.
8667          */
8668         ret = pin_metadata_blocks(fs_info);
8669         if (ret) {
8670                 fprintf(stderr, "error pinning down used bytes\n");
8671                 return ret;
8672         }
8673
8674         /*
8675          * Need to drop all the block groups since we're going to recreate all
8676          * of them again.
8677          */
8678         btrfs_free_block_groups(fs_info);
8679         ret = reset_block_groups(fs_info);
8680         if (ret) {
8681                 fprintf(stderr, "error resetting the block groups\n");
8682                 return ret;
8683         }
8684
8685         /* Ok we can allocate now, reinit the extent root */
8686         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8687         if (ret) {
8688                 fprintf(stderr, "extent root initialization failed\n");
8689                 /*
8690                  * When the transaction code is updated we should end the
8691                  * transaction, but for now progs only knows about commit so
8692                  * just return an error.
8693                  */
8694                 return ret;
8695         }
8696
8697         /*
8698          * Now we have all the in-memory block groups setup so we can make
8699          * allocations properly, and the metadata we care about is safe since we
8700          * pinned all of it above.
8701          */
8702         while (1) {
8703                 struct btrfs_block_group_cache *cache;
8704
8705                 cache = btrfs_lookup_first_block_group(fs_info, start);
8706                 if (!cache)
8707                         break;
8708                 start = cache->key.objectid + cache->key.offset;
8709                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8710                                         &cache->key, &cache->item,
8711                                         sizeof(cache->item));
8712                 if (ret) {
8713                         fprintf(stderr, "Error adding block group\n");
8714                         return ret;
8715                 }
8716                 btrfs_extent_post_op(trans, fs_info->extent_root);
8717         }
8718
8719         ret = reset_balance(trans, fs_info);
8720         if (ret)
8721                 fprintf(stderr, "error reseting the pending balance\n");
8722
8723         return ret;
8724 }
8725
8726 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8727 {
8728         struct btrfs_path *path;
8729         struct btrfs_trans_handle *trans;
8730         struct btrfs_key key;
8731         int ret;
8732
8733         printf("Recowing metadata block %llu\n", eb->start);
8734         key.objectid = btrfs_header_owner(eb);
8735         key.type = BTRFS_ROOT_ITEM_KEY;
8736         key.offset = (u64)-1;
8737
8738         root = btrfs_read_fs_root(root->fs_info, &key);
8739         if (IS_ERR(root)) {
8740                 fprintf(stderr, "Couldn't find owner root %llu\n",
8741                         key.objectid);
8742                 return PTR_ERR(root);
8743         }
8744
8745         path = btrfs_alloc_path();
8746         if (!path)
8747                 return -ENOMEM;
8748
8749         trans = btrfs_start_transaction(root, 1);
8750         if (IS_ERR(trans)) {
8751                 btrfs_free_path(path);
8752                 return PTR_ERR(trans);
8753         }
8754
8755         path->lowest_level = btrfs_header_level(eb);
8756         if (path->lowest_level)
8757                 btrfs_node_key_to_cpu(eb, &key, 0);
8758         else
8759                 btrfs_item_key_to_cpu(eb, &key, 0);
8760
8761         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8762         btrfs_commit_transaction(trans, root);
8763         btrfs_free_path(path);
8764         return ret;
8765 }
8766
8767 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8768 {
8769         struct btrfs_path *path;
8770         struct btrfs_trans_handle *trans;
8771         struct btrfs_key key;
8772         int ret;
8773
8774         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8775                bad->key.type, bad->key.offset);
8776         key.objectid = bad->root_id;
8777         key.type = BTRFS_ROOT_ITEM_KEY;
8778         key.offset = (u64)-1;
8779
8780         root = btrfs_read_fs_root(root->fs_info, &key);
8781         if (IS_ERR(root)) {
8782                 fprintf(stderr, "Couldn't find owner root %llu\n",
8783                         key.objectid);
8784                 return PTR_ERR(root);
8785         }
8786
8787         path = btrfs_alloc_path();
8788         if (!path)
8789                 return -ENOMEM;
8790
8791         trans = btrfs_start_transaction(root, 1);
8792         if (IS_ERR(trans)) {
8793                 btrfs_free_path(path);
8794                 return PTR_ERR(trans);
8795         }
8796
8797         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8798         if (ret) {
8799                 if (ret > 0)
8800                         ret = 0;
8801                 goto out;
8802         }
8803         ret = btrfs_del_item(trans, root, path);
8804 out:
8805         btrfs_commit_transaction(trans, root);
8806         btrfs_free_path(path);
8807         return ret;
8808 }
8809
8810 static int zero_log_tree(struct btrfs_root *root)
8811 {
8812         struct btrfs_trans_handle *trans;
8813         int ret;
8814
8815         trans = btrfs_start_transaction(root, 1);
8816         if (IS_ERR(trans)) {
8817                 ret = PTR_ERR(trans);
8818                 return ret;
8819         }
8820         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8821         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8822         ret = btrfs_commit_transaction(trans, root);
8823         return ret;
8824 }
8825
8826 static int populate_csum(struct btrfs_trans_handle *trans,
8827                          struct btrfs_root *csum_root, char *buf, u64 start,
8828                          u64 len)
8829 {
8830         u64 offset = 0;
8831         u64 sectorsize;
8832         int ret = 0;
8833
8834         while (offset < len) {
8835                 sectorsize = csum_root->sectorsize;
8836                 ret = read_extent_data(csum_root, buf, start + offset,
8837                                        &sectorsize, 0);
8838                 if (ret)
8839                         break;
8840                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8841                                             start + offset, buf, sectorsize);
8842                 if (ret)
8843                         break;
8844                 offset += sectorsize;
8845         }
8846         return ret;
8847 }
8848
8849 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8850                                       struct btrfs_root *csum_root,
8851                                       struct btrfs_root *cur_root)
8852 {
8853         struct btrfs_path *path;
8854         struct btrfs_key key;
8855         struct extent_buffer *node;
8856         struct btrfs_file_extent_item *fi;
8857         char *buf = NULL;
8858         u64 start = 0;
8859         u64 len = 0;
8860         int slot = 0;
8861         int ret = 0;
8862
8863         path = btrfs_alloc_path();
8864         if (!path)
8865                 return -ENOMEM;
8866         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8867         if (!buf) {
8868                 ret = -ENOMEM;
8869                 goto out;
8870         }
8871
8872         key.objectid = 0;
8873         key.offset = 0;
8874         key.type = 0;
8875
8876         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8877         if (ret < 0)
8878                 goto out;
8879         /* Iterate all regular file extents and fill its csum */
8880         while (1) {
8881                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8882
8883                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8884                         goto next;
8885                 node = path->nodes[0];
8886                 slot = path->slots[0];
8887                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8888                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8889                         goto next;
8890                 start = btrfs_file_extent_disk_bytenr(node, fi);
8891                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8892
8893                 ret = populate_csum(trans, csum_root, buf, start, len);
8894                 if (ret == -EEXIST)
8895                         ret = 0;
8896                 if (ret < 0)
8897                         goto out;
8898 next:
8899                 /*
8900                  * TODO: if next leaf is corrupted, jump to nearest next valid
8901                  * leaf.
8902                  */
8903                 ret = btrfs_next_item(cur_root, path);
8904                 if (ret < 0)
8905                         goto out;
8906                 if (ret > 0) {
8907                         ret = 0;
8908                         goto out;
8909                 }
8910         }
8911
8912 out:
8913         btrfs_free_path(path);
8914         free(buf);
8915         return ret;
8916 }
8917
8918 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8919                                   struct btrfs_root *csum_root)
8920 {
8921         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8922         struct btrfs_path *path;
8923         struct btrfs_root *tree_root = fs_info->tree_root;
8924         struct btrfs_root *cur_root;
8925         struct extent_buffer *node;
8926         struct btrfs_key key;
8927         int slot = 0;
8928         int ret = 0;
8929
8930         path = btrfs_alloc_path();
8931         if (!path)
8932                 return -ENOMEM;
8933
8934         key.objectid = BTRFS_FS_TREE_OBJECTID;
8935         key.offset = 0;
8936         key.type = BTRFS_ROOT_ITEM_KEY;
8937
8938         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8939         if (ret < 0)
8940                 goto out;
8941         if (ret > 0) {
8942                 ret = -ENOENT;
8943                 goto out;
8944         }
8945
8946         while (1) {
8947                 node = path->nodes[0];
8948                 slot = path->slots[0];
8949                 btrfs_item_key_to_cpu(node, &key, slot);
8950                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8951                         goto out;
8952                 if (key.type != BTRFS_ROOT_ITEM_KEY)
8953                         goto next;
8954                 if (!is_fstree(key.objectid))
8955                         goto next;
8956                 key.offset = (u64)-1;
8957
8958                 cur_root = btrfs_read_fs_root(fs_info, &key);
8959                 if (IS_ERR(cur_root) || !cur_root) {
8960                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8961                                 key.objectid);
8962                         goto out;
8963                 }
8964                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8965                                 cur_root);
8966                 if (ret < 0)
8967                         goto out;
8968 next:
8969                 ret = btrfs_next_item(tree_root, path);
8970                 if (ret > 0) {
8971                         ret = 0;
8972                         goto out;
8973                 }
8974                 if (ret < 0)
8975                         goto out;
8976         }
8977
8978 out:
8979         btrfs_free_path(path);
8980         return ret;
8981 }
8982
8983 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8984                                       struct btrfs_root *csum_root)
8985 {
8986         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8987         struct btrfs_path *path;
8988         struct btrfs_extent_item *ei;
8989         struct extent_buffer *leaf;
8990         char *buf;
8991         struct btrfs_key key;
8992         int ret;
8993
8994         path = btrfs_alloc_path();
8995         if (!path)
8996                 return -ENOMEM;
8997
8998         key.objectid = 0;
8999         key.type = BTRFS_EXTENT_ITEM_KEY;
9000         key.offset = 0;
9001
9002         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9003         if (ret < 0) {
9004                 btrfs_free_path(path);
9005                 return ret;
9006         }
9007
9008         buf = malloc(csum_root->sectorsize);
9009         if (!buf) {
9010                 btrfs_free_path(path);
9011                 return -ENOMEM;
9012         }
9013
9014         while (1) {
9015                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9016                         ret = btrfs_next_leaf(extent_root, path);
9017                         if (ret < 0)
9018                                 break;
9019                         if (ret) {
9020                                 ret = 0;
9021                                 break;
9022                         }
9023                 }
9024                 leaf = path->nodes[0];
9025
9026                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9027                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9028                         path->slots[0]++;
9029                         continue;
9030                 }
9031
9032                 ei = btrfs_item_ptr(leaf, path->slots[0],
9033                                     struct btrfs_extent_item);
9034                 if (!(btrfs_extent_flags(leaf, ei) &
9035                       BTRFS_EXTENT_FLAG_DATA)) {
9036                         path->slots[0]++;
9037                         continue;
9038                 }
9039
9040                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9041                                     key.offset);
9042                 if (ret)
9043                         break;
9044                 path->slots[0]++;
9045         }
9046
9047         btrfs_free_path(path);
9048         free(buf);
9049         return ret;
9050 }
9051
9052 /*
9053  * Recalculate the csum and put it into the csum tree.
9054  *
9055  * Extent tree init will wipe out all the extent info, so in that case, we
9056  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9057  * will use fs/subvol trees to init the csum tree.
9058  */
9059 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9060                           struct btrfs_root *csum_root,
9061                           int search_fs_tree)
9062 {
9063         if (search_fs_tree)
9064                 return fill_csum_tree_from_fs(trans, csum_root);
9065         else
9066                 return fill_csum_tree_from_extent(trans, csum_root);
9067 }
9068
9069 struct root_item_info {
9070         /* level of the root */
9071         u8 level;
9072         /* number of nodes at this level, must be 1 for a root */
9073         int node_count;
9074         u64 bytenr;
9075         u64 gen;
9076         struct cache_extent cache_extent;
9077 };
9078
9079 static struct cache_tree *roots_info_cache = NULL;
9080
9081 static void free_roots_info_cache(void)
9082 {
9083         if (!roots_info_cache)
9084                 return;
9085
9086         while (!cache_tree_empty(roots_info_cache)) {
9087                 struct cache_extent *entry;
9088                 struct root_item_info *rii;
9089
9090                 entry = first_cache_extent(roots_info_cache);
9091                 if (!entry)
9092                         break;
9093                 remove_cache_extent(roots_info_cache, entry);
9094                 rii = container_of(entry, struct root_item_info, cache_extent);
9095                 free(rii);
9096         }
9097
9098         free(roots_info_cache);
9099         roots_info_cache = NULL;
9100 }
9101
9102 static int build_roots_info_cache(struct btrfs_fs_info *info)
9103 {
9104         int ret = 0;
9105         struct btrfs_key key;
9106         struct extent_buffer *leaf;
9107         struct btrfs_path *path;
9108
9109         if (!roots_info_cache) {
9110                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9111                 if (!roots_info_cache)
9112                         return -ENOMEM;
9113                 cache_tree_init(roots_info_cache);
9114         }
9115
9116         path = btrfs_alloc_path();
9117         if (!path)
9118                 return -ENOMEM;
9119
9120         key.objectid = 0;
9121         key.type = BTRFS_EXTENT_ITEM_KEY;
9122         key.offset = 0;
9123
9124         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9125         if (ret < 0)
9126                 goto out;
9127         leaf = path->nodes[0];
9128
9129         while (1) {
9130                 struct btrfs_key found_key;
9131                 struct btrfs_extent_item *ei;
9132                 struct btrfs_extent_inline_ref *iref;
9133                 int slot = path->slots[0];
9134                 int type;
9135                 u64 flags;
9136                 u64 root_id;
9137                 u8 level;
9138                 struct cache_extent *entry;
9139                 struct root_item_info *rii;
9140
9141                 if (slot >= btrfs_header_nritems(leaf)) {
9142                         ret = btrfs_next_leaf(info->extent_root, path);
9143                         if (ret < 0) {
9144                                 break;
9145                         } else if (ret) {
9146                                 ret = 0;
9147                                 break;
9148                         }
9149                         leaf = path->nodes[0];
9150                         slot = path->slots[0];
9151                 }
9152
9153                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9154
9155                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9156                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9157                         goto next;
9158
9159                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9160                 flags = btrfs_extent_flags(leaf, ei);
9161
9162                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9163                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9164                         goto next;
9165
9166                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9167                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9168                         level = found_key.offset;
9169                 } else {
9170                         struct btrfs_tree_block_info *info;
9171
9172                         info = (struct btrfs_tree_block_info *)(ei + 1);
9173                         iref = (struct btrfs_extent_inline_ref *)(info + 1);
9174                         level = btrfs_tree_block_level(leaf, info);
9175                 }
9176
9177                 /*
9178                  * For a root extent, it must be of the following type and the
9179                  * first (and only one) iref in the item.
9180                  */
9181                 type = btrfs_extent_inline_ref_type(leaf, iref);
9182                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9183                         goto next;
9184
9185                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9186                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9187                 if (!entry) {
9188                         rii = malloc(sizeof(struct root_item_info));
9189                         if (!rii) {
9190                                 ret = -ENOMEM;
9191                                 goto out;
9192                         }
9193                         rii->cache_extent.start = root_id;
9194                         rii->cache_extent.size = 1;
9195                         rii->level = (u8)-1;
9196                         entry = &rii->cache_extent;
9197                         ret = insert_cache_extent(roots_info_cache, entry);
9198                         ASSERT(ret == 0);
9199                 } else {
9200                         rii = container_of(entry, struct root_item_info,
9201                                            cache_extent);
9202                 }
9203
9204                 ASSERT(rii->cache_extent.start == root_id);
9205                 ASSERT(rii->cache_extent.size == 1);
9206
9207                 if (level > rii->level || rii->level == (u8)-1) {
9208                         rii->level = level;
9209                         rii->bytenr = found_key.objectid;
9210                         rii->gen = btrfs_extent_generation(leaf, ei);
9211                         rii->node_count = 1;
9212                 } else if (level == rii->level) {
9213                         rii->node_count++;
9214                 }
9215 next:
9216                 path->slots[0]++;
9217         }
9218
9219 out:
9220         btrfs_free_path(path);
9221
9222         return ret;
9223 }
9224
9225 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9226                                   struct btrfs_path *path,
9227                                   const struct btrfs_key *root_key,
9228                                   const int read_only_mode)
9229 {
9230         const u64 root_id = root_key->objectid;
9231         struct cache_extent *entry;
9232         struct root_item_info *rii;
9233         struct btrfs_root_item ri;
9234         unsigned long offset;
9235
9236         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9237         if (!entry) {
9238                 fprintf(stderr,
9239                         "Error: could not find extent items for root %llu\n",
9240                         root_key->objectid);
9241                 return -ENOENT;
9242         }
9243
9244         rii = container_of(entry, struct root_item_info, cache_extent);
9245         ASSERT(rii->cache_extent.start == root_id);
9246         ASSERT(rii->cache_extent.size == 1);
9247
9248         if (rii->node_count != 1) {
9249                 fprintf(stderr,
9250                         "Error: could not find btree root extent for root %llu\n",
9251                         root_id);
9252                 return -ENOENT;
9253         }
9254
9255         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9256         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9257
9258         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9259             btrfs_root_level(&ri) != rii->level ||
9260             btrfs_root_generation(&ri) != rii->gen) {
9261
9262                 /*
9263                  * If we're in repair mode but our caller told us to not update
9264                  * the root item, i.e. just check if it needs to be updated, don't
9265                  * print this message, since the caller will call us again shortly
9266                  * for the same root item without read only mode (the caller will
9267                  * open a transaction first).
9268                  */
9269                 if (!(read_only_mode && repair))
9270                         fprintf(stderr,
9271                                 "%sroot item for root %llu,"
9272                                 " current bytenr %llu, current gen %llu, current level %u,"
9273                                 " new bytenr %llu, new gen %llu, new level %u\n",
9274                                 (read_only_mode ? "" : "fixing "),
9275                                 root_id,
9276                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9277                                 btrfs_root_level(&ri),
9278                                 rii->bytenr, rii->gen, rii->level);
9279
9280                 if (btrfs_root_generation(&ri) > rii->gen) {
9281                         fprintf(stderr,
9282                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9283                                 root_id, btrfs_root_generation(&ri), rii->gen);
9284                         return -EINVAL;
9285                 }
9286
9287                 if (!read_only_mode) {
9288                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9289                         btrfs_set_root_level(&ri, rii->level);
9290                         btrfs_set_root_generation(&ri, rii->gen);
9291                         write_extent_buffer(path->nodes[0], &ri,
9292                                             offset, sizeof(ri));
9293                 }
9294
9295                 return 1;
9296         }
9297
9298         return 0;
9299 }
9300
9301 /*
9302  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9303  * caused read-only snapshots to be corrupted if they were created at a moment
9304  * when the source subvolume/snapshot had orphan items. The issue was that the
9305  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9306  * node instead of the post orphan cleanup root node.
9307  * So this function, and its callees, just detects and fixes those cases. Even
9308  * though the regression was for read-only snapshots, this function applies to
9309  * any snapshot/subvolume root.
9310  * This must be run before any other repair code - not doing it so, makes other
9311  * repair code delete or modify backrefs in the extent tree for example, which
9312  * will result in an inconsistent fs after repairing the root items.
9313  */
9314 static int repair_root_items(struct btrfs_fs_info *info)
9315 {
9316         struct btrfs_path *path = NULL;
9317         struct btrfs_key key;
9318         struct extent_buffer *leaf;
9319         struct btrfs_trans_handle *trans = NULL;
9320         int ret = 0;
9321         int bad_roots = 0;
9322         int need_trans = 0;
9323
9324         ret = build_roots_info_cache(info);
9325         if (ret)
9326                 goto out;
9327
9328         path = btrfs_alloc_path();
9329         if (!path) {
9330                 ret = -ENOMEM;
9331                 goto out;
9332         }
9333
9334         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9335         key.type = BTRFS_ROOT_ITEM_KEY;
9336         key.offset = 0;
9337
9338 again:
9339         /*
9340          * Avoid opening and committing transactions if a leaf doesn't have
9341          * any root items that need to be fixed, so that we avoid rotating
9342          * backup roots unnecessarily.
9343          */
9344         if (need_trans) {
9345                 trans = btrfs_start_transaction(info->tree_root, 1);
9346                 if (IS_ERR(trans)) {
9347                         ret = PTR_ERR(trans);
9348                         goto out;
9349                 }
9350         }
9351
9352         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9353                                 0, trans ? 1 : 0);
9354         if (ret < 0)
9355                 goto out;
9356         leaf = path->nodes[0];
9357
9358         while (1) {
9359                 struct btrfs_key found_key;
9360
9361                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9362                         int no_more_keys = find_next_key(path, &key);
9363
9364                         btrfs_release_path(path);
9365                         if (trans) {
9366                                 ret = btrfs_commit_transaction(trans,
9367                                                                info->tree_root);
9368                                 trans = NULL;
9369                                 if (ret < 0)
9370                                         goto out;
9371                         }
9372                         need_trans = 0;
9373                         if (no_more_keys)
9374                                 break;
9375                         goto again;
9376                 }
9377
9378                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9379
9380                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9381                         goto next;
9382                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9383                         goto next;
9384
9385                 ret = maybe_repair_root_item(info, path, &found_key,
9386                                              trans ? 0 : 1);
9387                 if (ret < 0)
9388                         goto out;
9389                 if (ret) {
9390                         if (!trans && repair) {
9391                                 need_trans = 1;
9392                                 key = found_key;
9393                                 btrfs_release_path(path);
9394                                 goto again;
9395                         }
9396                         bad_roots++;
9397                 }
9398 next:
9399                 path->slots[0]++;
9400         }
9401         ret = 0;
9402 out:
9403         free_roots_info_cache();
9404         btrfs_free_path(path);
9405         if (trans)
9406                 btrfs_commit_transaction(trans, info->tree_root);
9407         if (ret < 0)
9408                 return ret;
9409
9410         return bad_roots;
9411 }
9412
9413 const char * const cmd_check_usage[] = {
9414         "btrfs check [options] <device>",
9415         "Check structural inegrity of a filesystem (unmounted).",
9416         "Check structural inegrity of an unmounted filesystem. Verify internal",
9417         "trees' consistency and item connectivity. In the repair mode try to",
9418         "fix the problems found.",
9419         "WARNING: the repair mode is considered dangerous",
9420         "",
9421         "-s|--super <superblock>     use this superblock copy",
9422         "-b|--backup                 use the backup root copy",
9423         "--repair                    try to repair the filesystem",
9424         "--readonly                  run in read-only mode (default)",
9425         "--init-csum-tree            create a new CRC tree",
9426         "--init-extent-tree          create a new extent tree",
9427         "--check-data-csum           verify checkums of data blocks",
9428         "-Q|--qgroup-report           print a report on qgroup consistency",
9429         "-E|--subvol-extents <subvolid>",
9430         "                            print subvolume extents and sharing state",
9431         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9432         "-p|--progress               indicate progress",
9433         NULL
9434 };
9435
9436 int cmd_check(int argc, char **argv)
9437 {
9438         struct cache_tree root_cache;
9439         struct btrfs_root *root;
9440         struct btrfs_fs_info *info;
9441         u64 bytenr = 0;
9442         u64 subvolid = 0;
9443         u64 tree_root_bytenr = 0;
9444         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9445         int ret;
9446         u64 num;
9447         int init_csum_tree = 0;
9448         int readonly = 0;
9449         int qgroup_report = 0;
9450         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9451
9452         while(1) {
9453                 int c;
9454                 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9455                         OPT_CHECK_CSUM, OPT_READONLY };
9456                 static const struct option long_options[] = {
9457                         { "super", required_argument, NULL, 's' },
9458                         { "repair", no_argument, NULL, OPT_REPAIR },
9459                         { "readonly", no_argument, NULL, OPT_READONLY },
9460                         { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9461                         { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9462                         { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9463                         { "backup", no_argument, NULL, 'b' },
9464                         { "subvol-extents", required_argument, NULL, 'E' },
9465                         { "qgroup-report", no_argument, NULL, 'Q' },
9466                         { "tree-root", required_argument, NULL, 'r' },
9467                         { "progress", no_argument, NULL, 'p' },
9468                         { NULL, 0, NULL, 0}
9469                 };
9470
9471                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9472                 if (c < 0)
9473                         break;
9474                 switch(c) {
9475                         case 'a': /* ignored */ break;
9476                         case 'b':
9477                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9478                                 break;
9479                         case 's':
9480                                 num = arg_strtou64(optarg);
9481                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9482                                         fprintf(stderr,
9483                                                 "ERROR: super mirror should be less than: %d\n",
9484                                                 BTRFS_SUPER_MIRROR_MAX);
9485                                         exit(1);
9486                                 }
9487                                 bytenr = btrfs_sb_offset(((int)num));
9488                                 printf("using SB copy %llu, bytenr %llu\n", num,
9489                                        (unsigned long long)bytenr);
9490                                 break;
9491                         case 'Q':
9492                                 qgroup_report = 1;
9493                                 break;
9494                         case 'E':
9495                                 subvolid = arg_strtou64(optarg);
9496                                 break;
9497                         case 'r':
9498                                 tree_root_bytenr = arg_strtou64(optarg);
9499                                 break;
9500                         case 'p':
9501                                 ctx.progress_enabled = true;
9502                                 break;
9503                         case '?':
9504                         case 'h':
9505                                 usage(cmd_check_usage);
9506                         case OPT_REPAIR:
9507                                 printf("enabling repair mode\n");
9508                                 repair = 1;
9509                                 ctree_flags |= OPEN_CTREE_WRITES;
9510                                 break;
9511                         case OPT_READONLY:
9512                                 readonly = 1;
9513                                 break;
9514                         case OPT_INIT_CSUM:
9515                                 printf("Creating a new CRC tree\n");
9516                                 init_csum_tree = 1;
9517                                 repair = 1;
9518                                 ctree_flags |= OPEN_CTREE_WRITES;
9519                                 break;
9520                         case OPT_INIT_EXTENT:
9521                                 init_extent_tree = 1;
9522                                 ctree_flags |= (OPEN_CTREE_WRITES |
9523                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9524                                 repair = 1;
9525                                 break;
9526                         case OPT_CHECK_CSUM:
9527                                 check_data_csum = 1;
9528                                 break;
9529                 }
9530         }
9531         argc = argc - optind;
9532
9533         if (check_argc_exact(argc, 1))
9534                 usage(cmd_check_usage);
9535
9536         if (ctx.progress_enabled) {
9537                 ctx.tp = TASK_NOTHING;
9538                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9539         }
9540
9541         /* This check is the only reason for --readonly to exist */
9542         if (readonly && repair) {
9543                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9544                 exit(1);
9545         }
9546
9547         radix_tree_init();
9548         cache_tree_init(&root_cache);
9549
9550         if((ret = check_mounted(argv[optind])) < 0) {
9551                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9552                 goto err_out;
9553         } else if(ret) {
9554                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9555                 ret = -EBUSY;
9556                 goto err_out;
9557         }
9558
9559         /* only allow partial opening under repair mode */
9560         if (repair)
9561                 ctree_flags |= OPEN_CTREE_PARTIAL;
9562
9563         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9564                                   ctree_flags);
9565         if (!info) {
9566                 fprintf(stderr, "Couldn't open file system\n");
9567                 ret = -EIO;
9568                 goto err_out;
9569         }
9570
9571         global_info = info;
9572         root = info->fs_root;
9573
9574         /*
9575          * repair mode will force us to commit transaction which
9576          * will make us fail to load log tree when mounting.
9577          */
9578         if (repair && btrfs_super_log_root(info->super_copy)) {
9579                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9580                 if (!ret) {
9581                         ret = 1;
9582                         goto close_out;
9583                 }
9584                 ret = zero_log_tree(root);
9585                 if (ret) {
9586                         fprintf(stderr, "fail to zero log tree\n");
9587                         goto close_out;
9588                 }
9589         }
9590
9591         uuid_unparse(info->super_copy->fsid, uuidbuf);
9592         if (qgroup_report) {
9593                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9594                        uuidbuf);
9595                 ret = qgroup_verify_all(info);
9596                 if (ret == 0)
9597                         print_qgroup_report(1);
9598                 goto close_out;
9599         }
9600         if (subvolid) {
9601                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9602                        subvolid, argv[optind], uuidbuf);
9603                 ret = print_extent_state(info, subvolid);
9604                 goto close_out;
9605         }
9606         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9607
9608         if (!extent_buffer_uptodate(info->tree_root->node) ||
9609             !extent_buffer_uptodate(info->dev_root->node) ||
9610             !extent_buffer_uptodate(info->chunk_root->node)) {
9611                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9612                 ret = -EIO;
9613                 goto close_out;
9614         }
9615
9616         if (init_extent_tree || init_csum_tree) {
9617                 struct btrfs_trans_handle *trans;
9618
9619                 trans = btrfs_start_transaction(info->extent_root, 0);
9620                 if (IS_ERR(trans)) {
9621                         fprintf(stderr, "Error starting transaction\n");
9622                         ret = PTR_ERR(trans);
9623                         goto close_out;
9624                 }
9625
9626                 if (init_extent_tree) {
9627                         printf("Creating a new extent tree\n");
9628                         ret = reinit_extent_tree(trans, info);
9629                         if (ret)
9630                                 goto close_out;
9631                 }
9632
9633                 if (init_csum_tree) {
9634                         fprintf(stderr, "Reinit crc root\n");
9635                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9636                         if (ret) {
9637                                 fprintf(stderr, "crc root initialization failed\n");
9638                                 ret = -EIO;
9639                                 goto close_out;
9640                         }
9641
9642                         ret = fill_csum_tree(trans, info->csum_root,
9643                                              init_extent_tree);
9644                         if (ret) {
9645                                 fprintf(stderr, "crc refilling failed\n");
9646                                 return -EIO;
9647                         }
9648                 }
9649                 /*
9650                  * Ok now we commit and run the normal fsck, which will add
9651                  * extent entries for all of the items it finds.
9652                  */
9653                 ret = btrfs_commit_transaction(trans, info->extent_root);
9654                 if (ret)
9655                         goto close_out;
9656         }
9657         if (!extent_buffer_uptodate(info->extent_root->node)) {
9658                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9659                 ret = -EIO;
9660                 goto close_out;
9661         }
9662         if (!extent_buffer_uptodate(info->csum_root->node)) {
9663                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9664                 ret = -EIO;
9665                 goto close_out;
9666         }
9667
9668         if (!ctx.progress_enabled)
9669                 fprintf(stderr, "checking extents\n");
9670         ret = check_chunks_and_extents(root);
9671         if (ret)
9672                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9673
9674         ret = repair_root_items(info);
9675         if (ret < 0)
9676                 goto close_out;
9677         if (repair) {
9678                 fprintf(stderr, "Fixed %d roots.\n", ret);
9679                 ret = 0;
9680         } else if (ret > 0) {
9681                 fprintf(stderr,
9682                        "Found %d roots with an outdated root item.\n",
9683                        ret);
9684                 fprintf(stderr,
9685                         "Please run a filesystem check with the option --repair to fix them.\n");
9686                 ret = 1;
9687                 goto close_out;
9688         }
9689
9690         if (!ctx.progress_enabled)
9691                 fprintf(stderr, "checking free space cache\n");
9692         ret = check_space_cache(root);
9693         if (ret)
9694                 goto out;
9695
9696         /*
9697          * We used to have to have these hole extents in between our real
9698          * extents so if we don't have this flag set we need to make sure there
9699          * are no gaps in the file extents for inodes, otherwise we can just
9700          * ignore it when this happens.
9701          */
9702         no_holes = btrfs_fs_incompat(root->fs_info,
9703                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9704         if (!ctx.progress_enabled)
9705                 fprintf(stderr, "checking fs roots\n");
9706         ret = check_fs_roots(root, &root_cache);
9707         if (ret)
9708                 goto out;
9709
9710         fprintf(stderr, "checking csums\n");
9711         ret = check_csums(root);
9712         if (ret)
9713                 goto out;
9714
9715         fprintf(stderr, "checking root refs\n");
9716         ret = check_root_refs(root, &root_cache);
9717         if (ret)
9718                 goto out;
9719
9720         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9721                 struct extent_buffer *eb;
9722
9723                 eb = list_first_entry(&root->fs_info->recow_ebs,
9724                                       struct extent_buffer, recow);
9725                 list_del_init(&eb->recow);
9726                 ret = recow_extent_buffer(root, eb);
9727                 if (ret)
9728                         break;
9729         }
9730
9731         while (!list_empty(&delete_items)) {
9732                 struct bad_item *bad;
9733
9734                 bad = list_first_entry(&delete_items, struct bad_item, list);
9735                 list_del_init(&bad->list);
9736                 if (repair)
9737                         ret = delete_bad_item(root, bad);
9738                 free(bad);
9739         }
9740
9741         if (info->quota_enabled) {
9742                 int err;
9743                 fprintf(stderr, "checking quota groups\n");
9744                 err = qgroup_verify_all(info);
9745                 if (err)
9746                         goto out;
9747         }
9748
9749         if (!list_empty(&root->fs_info->recow_ebs)) {
9750                 fprintf(stderr, "Transid errors in file system\n");
9751                 ret = 1;
9752         }
9753 out:
9754         print_qgroup_report(0);
9755         if (found_old_backref) { /*
9756                  * there was a disk format change when mixed
9757                  * backref was in testing tree. The old format
9758                  * existed about one week.
9759                  */
9760                 printf("\n * Found old mixed backref format. "
9761                        "The old format is not supported! *"
9762                        "\n * Please mount the FS in readonly mode, "
9763                        "backup data and re-format the FS. *\n\n");
9764                 ret = 1;
9765         }
9766         printf("found %llu bytes used err is %d\n",
9767                (unsigned long long)bytes_used, ret);
9768         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9769         printf("total tree bytes: %llu\n",
9770                (unsigned long long)total_btree_bytes);
9771         printf("total fs tree bytes: %llu\n",
9772                (unsigned long long)total_fs_tree_bytes);
9773         printf("total extent tree bytes: %llu\n",
9774                (unsigned long long)total_extent_tree_bytes);
9775         printf("btree space waste bytes: %llu\n",
9776                (unsigned long long)btree_space_waste);
9777         printf("file data blocks allocated: %llu\n referenced %llu\n",
9778                 (unsigned long long)data_bytes_allocated,
9779                 (unsigned long long)data_bytes_referenced);
9780
9781         free_root_recs_tree(&root_cache);
9782 close_out:
9783         close_ctree(root);
9784 err_out:
9785         if (ctx.progress_enabled)
9786                 task_deinit(ctx.info);
9787
9788         return ret;
9789 }