btrfs-progs: fix build of standalone utilities after clean
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int repair = 0;
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76
77 static void *print_status_check(void *p)
78 {
79         struct task_ctx *priv = p;
80         const char work_indicator[] = { '.', 'o', 'O', 'o' };
81         uint32_t count = 0;
82         static char *task_position_string[] = {
83                 "checking extents",
84                 "checking free space cache",
85                 "checking fs roots",
86         };
87
88         task_period_start(priv->info, 1000 /* 1s */);
89
90         if (priv->tp == TASK_NOTHING)
91                 return NULL;
92
93         while (1) {
94                 printf("%s [%c]\r", task_position_string[priv->tp],
95                                 work_indicator[count % 4]);
96                 count++;
97                 fflush(stdout);
98                 task_period_wait(priv->info);
99         }
100         return NULL;
101 }
102
103 static int print_status_return(void *p)
104 {
105         printf("\n");
106         fflush(stdout);
107
108         return 0;
109 }
110
111 struct extent_backref {
112         struct list_head list;
113         unsigned int is_data:1;
114         unsigned int found_extent_tree:1;
115         unsigned int full_backref:1;
116         unsigned int found_ref:1;
117         unsigned int broken:1;
118 };
119
120 struct data_backref {
121         struct extent_backref node;
122         union {
123                 u64 parent;
124                 u64 root;
125         };
126         u64 owner;
127         u64 offset;
128         u64 disk_bytenr;
129         u64 bytes;
130         u64 ram_bytes;
131         u32 num_refs;
132         u32 found_ref;
133 };
134
135 /*
136  * Much like data_backref, just removed the undetermined members
137  * and change it to use list_head.
138  * During extent scan, it is stored in root->orphan_data_extent.
139  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
140  */
141 struct orphan_data_extent {
142         struct list_head list;
143         u64 root;
144         u64 objectid;
145         u64 offset;
146         u64 disk_bytenr;
147         u64 disk_len;
148 };
149
150 struct tree_backref {
151         struct extent_backref node;
152         union {
153                 u64 parent;
154                 u64 root;
155         };
156 };
157
158 struct extent_record {
159         struct list_head backrefs;
160         struct list_head dups;
161         struct list_head list;
162         struct cache_extent cache;
163         struct btrfs_disk_key parent_key;
164         u64 start;
165         u64 max_size;
166         u64 nr;
167         u64 refs;
168         u64 extent_item_refs;
169         u64 generation;
170         u64 parent_generation;
171         u64 info_objectid;
172         u32 num_duplicates;
173         u8 info_level;
174         int flag_block_full_backref;
175         unsigned int found_rec:1;
176         unsigned int content_checked:1;
177         unsigned int owner_ref_checked:1;
178         unsigned int is_root:1;
179         unsigned int metadata:1;
180         unsigned int bad_full_backref:1;
181         unsigned int crossing_stripes:1;
182         unsigned int wrong_chunk_type:1;
183 };
184
185 struct inode_backref {
186         struct list_head list;
187         unsigned int found_dir_item:1;
188         unsigned int found_dir_index:1;
189         unsigned int found_inode_ref:1;
190         unsigned int filetype:8;
191         int errors;
192         unsigned int ref_type;
193         u64 dir;
194         u64 index;
195         u16 namelen;
196         char name[0];
197 };
198
199 struct root_item_record {
200         struct list_head list;
201         u64 objectid;
202         u64 bytenr;
203         u64 last_snapshot;
204         u8 level;
205         u8 drop_level;
206         int level_size;
207         struct btrfs_key drop_key;
208 };
209
210 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
211 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
212 #define REF_ERR_NO_INODE_REF            (1 << 2)
213 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
214 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
215 #define REF_ERR_DUP_INODE_REF           (1 << 5)
216 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
217 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
218 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
219 #define REF_ERR_NO_ROOT_REF             (1 << 9)
220 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
221 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
222 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
223
224 struct file_extent_hole {
225         struct rb_node node;
226         u64 start;
227         u64 len;
228 };
229
230 /* Compatible function to allow reuse of old codes */
231 static u64 first_extent_gap(struct rb_root *holes)
232 {
233         struct file_extent_hole *hole;
234
235         if (RB_EMPTY_ROOT(holes))
236                 return (u64)-1;
237
238         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
239         return hole->start;
240 }
241
242 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
243 {
244         struct file_extent_hole *hole1;
245         struct file_extent_hole *hole2;
246
247         hole1 = rb_entry(node1, struct file_extent_hole, node);
248         hole2 = rb_entry(node2, struct file_extent_hole, node);
249
250         if (hole1->start > hole2->start)
251                 return -1;
252         if (hole1->start < hole2->start)
253                 return 1;
254         /* Now hole1->start == hole2->start */
255         if (hole1->len >= hole2->len)
256                 /*
257                  * Hole 1 will be merge center
258                  * Same hole will be merged later
259                  */
260                 return -1;
261         /* Hole 2 will be merge center */
262         return 1;
263 }
264
265 /*
266  * Add a hole to the record
267  *
268  * This will do hole merge for copy_file_extent_holes(),
269  * which will ensure there won't be continuous holes.
270  */
271 static int add_file_extent_hole(struct rb_root *holes,
272                                 u64 start, u64 len)
273 {
274         struct file_extent_hole *hole;
275         struct file_extent_hole *prev = NULL;
276         struct file_extent_hole *next = NULL;
277
278         hole = malloc(sizeof(*hole));
279         if (!hole)
280                 return -ENOMEM;
281         hole->start = start;
282         hole->len = len;
283         /* Since compare will not return 0, no -EEXIST will happen */
284         rb_insert(holes, &hole->node, compare_hole);
285
286         /* simple merge with previous hole */
287         if (rb_prev(&hole->node))
288                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
289                                 node);
290         if (prev && prev->start + prev->len >= hole->start) {
291                 hole->len = hole->start + hole->len - prev->start;
292                 hole->start = prev->start;
293                 rb_erase(&prev->node, holes);
294                 free(prev);
295                 prev = NULL;
296         }
297
298         /* iterate merge with next holes */
299         while (1) {
300                 if (!rb_next(&hole->node))
301                         break;
302                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
303                                         node);
304                 if (hole->start + hole->len >= next->start) {
305                         if (hole->start + hole->len <= next->start + next->len)
306                                 hole->len = next->start + next->len -
307                                             hole->start;
308                         rb_erase(&next->node, holes);
309                         free(next);
310                         next = NULL;
311                 } else
312                         break;
313         }
314         return 0;
315 }
316
317 static int compare_hole_range(struct rb_node *node, void *data)
318 {
319         struct file_extent_hole *hole;
320         u64 start;
321
322         hole = (struct file_extent_hole *)data;
323         start = hole->start;
324
325         hole = rb_entry(node, struct file_extent_hole, node);
326         if (start < hole->start)
327                 return -1;
328         if (start >= hole->start && start < hole->start + hole->len)
329                 return 0;
330         return 1;
331 }
332
333 /*
334  * Delete a hole in the record
335  *
336  * This will do the hole split and is much restrict than add.
337  */
338 static int del_file_extent_hole(struct rb_root *holes,
339                                 u64 start, u64 len)
340 {
341         struct file_extent_hole *hole;
342         struct file_extent_hole tmp;
343         u64 prev_start = 0;
344         u64 prev_len = 0;
345         u64 next_start = 0;
346         u64 next_len = 0;
347         struct rb_node *node;
348         int have_prev = 0;
349         int have_next = 0;
350         int ret = 0;
351
352         tmp.start = start;
353         tmp.len = len;
354         node = rb_search(holes, &tmp, compare_hole_range, NULL);
355         if (!node)
356                 return -EEXIST;
357         hole = rb_entry(node, struct file_extent_hole, node);
358         if (start + len > hole->start + hole->len)
359                 return -EEXIST;
360
361         /*
362          * Now there will be no overflap, delete the hole and re-add the
363          * split(s) if they exists.
364          */
365         if (start > hole->start) {
366                 prev_start = hole->start;
367                 prev_len = start - hole->start;
368                 have_prev = 1;
369         }
370         if (hole->start + hole->len > start + len) {
371                 next_start = start + len;
372                 next_len = hole->start + hole->len - start - len;
373                 have_next = 1;
374         }
375         rb_erase(node, holes);
376         free(hole);
377         if (have_prev) {
378                 ret = add_file_extent_hole(holes, prev_start, prev_len);
379                 if (ret < 0)
380                         return ret;
381         }
382         if (have_next) {
383                 ret = add_file_extent_hole(holes, next_start, next_len);
384                 if (ret < 0)
385                         return ret;
386         }
387         return 0;
388 }
389
390 static int copy_file_extent_holes(struct rb_root *dst,
391                                   struct rb_root *src)
392 {
393         struct file_extent_hole *hole;
394         struct rb_node *node;
395         int ret = 0;
396
397         node = rb_first(src);
398         while (node) {
399                 hole = rb_entry(node, struct file_extent_hole, node);
400                 ret = add_file_extent_hole(dst, hole->start, hole->len);
401                 if (ret)
402                         break;
403                 node = rb_next(node);
404         }
405         return ret;
406 }
407
408 static void free_file_extent_holes(struct rb_root *holes)
409 {
410         struct rb_node *node;
411         struct file_extent_hole *hole;
412
413         node = rb_first(holes);
414         while (node) {
415                 hole = rb_entry(node, struct file_extent_hole, node);
416                 rb_erase(node, holes);
417                 free(hole);
418                 node = rb_first(holes);
419         }
420 }
421
422 struct inode_record {
423         struct list_head backrefs;
424         unsigned int checked:1;
425         unsigned int merging:1;
426         unsigned int found_inode_item:1;
427         unsigned int found_dir_item:1;
428         unsigned int found_file_extent:1;
429         unsigned int found_csum_item:1;
430         unsigned int some_csum_missing:1;
431         unsigned int nodatasum:1;
432         int errors;
433
434         u64 ino;
435         u32 nlink;
436         u32 imode;
437         u64 isize;
438         u64 nbytes;
439
440         u32 found_link;
441         u64 found_size;
442         u64 extent_start;
443         u64 extent_end;
444         struct rb_root holes;
445         struct list_head orphan_extents;
446
447         u32 refs;
448 };
449
450 #define I_ERR_NO_INODE_ITEM             (1 << 0)
451 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
452 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
453 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
454 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
455 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
456 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
457 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
458 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
459 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
460 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
461 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
462 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
463 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
464 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
465
466 struct root_backref {
467         struct list_head list;
468         unsigned int found_dir_item:1;
469         unsigned int found_dir_index:1;
470         unsigned int found_back_ref:1;
471         unsigned int found_forward_ref:1;
472         unsigned int reachable:1;
473         int errors;
474         u64 ref_root;
475         u64 dir;
476         u64 index;
477         u16 namelen;
478         char name[0];
479 };
480
481 struct root_record {
482         struct list_head backrefs;
483         struct cache_extent cache;
484         unsigned int found_root_item:1;
485         u64 objectid;
486         u32 found_ref;
487 };
488
489 struct ptr_node {
490         struct cache_extent cache;
491         void *data;
492 };
493
494 struct shared_node {
495         struct cache_extent cache;
496         struct cache_tree root_cache;
497         struct cache_tree inode_cache;
498         struct inode_record *current;
499         u32 refs;
500 };
501
502 struct block_info {
503         u64 start;
504         u32 size;
505 };
506
507 struct walk_control {
508         struct cache_tree shared;
509         struct shared_node *nodes[BTRFS_MAX_LEVEL];
510         int active_node;
511         int root_level;
512 };
513
514 struct bad_item {
515         struct btrfs_key key;
516         u64 root_id;
517         struct list_head list;
518 };
519
520 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
521
522 static void record_root_in_trans(struct btrfs_trans_handle *trans,
523                                  struct btrfs_root *root)
524 {
525         if (root->last_trans != trans->transid) {
526                 root->track_dirty = 1;
527                 root->last_trans = trans->transid;
528                 root->commit_root = root->node;
529                 extent_buffer_get(root->node);
530         }
531 }
532
533 static u8 imode_to_type(u32 imode)
534 {
535 #define S_SHIFT 12
536         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
537                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
538                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
539                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
540                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
541                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
542                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
543                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
544         };
545
546         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
547 #undef S_SHIFT
548 }
549
550 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
551 {
552         struct device_record *rec1;
553         struct device_record *rec2;
554
555         rec1 = rb_entry(node1, struct device_record, node);
556         rec2 = rb_entry(node2, struct device_record, node);
557         if (rec1->devid > rec2->devid)
558                 return -1;
559         else if (rec1->devid < rec2->devid)
560                 return 1;
561         else
562                 return 0;
563 }
564
565 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
566 {
567         struct inode_record *rec;
568         struct inode_backref *backref;
569         struct inode_backref *orig;
570         struct inode_backref *tmp;
571         struct orphan_data_extent *src_orphan;
572         struct orphan_data_extent *dst_orphan;
573         size_t size;
574         int ret;
575
576         rec = malloc(sizeof(*rec));
577         if (!rec)
578                 return ERR_PTR(-ENOMEM);
579         memcpy(rec, orig_rec, sizeof(*rec));
580         rec->refs = 1;
581         INIT_LIST_HEAD(&rec->backrefs);
582         INIT_LIST_HEAD(&rec->orphan_extents);
583         rec->holes = RB_ROOT;
584
585         list_for_each_entry(orig, &orig_rec->backrefs, list) {
586                 size = sizeof(*orig) + orig->namelen + 1;
587                 backref = malloc(size);
588                 if (!backref) {
589                         ret = -ENOMEM;
590                         goto cleanup;
591                 }
592                 memcpy(backref, orig, size);
593                 list_add_tail(&backref->list, &rec->backrefs);
594         }
595         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
596                 dst_orphan = malloc(sizeof(*dst_orphan));
597                 if (!dst_orphan) {
598                         ret = -ENOMEM;
599                         goto cleanup;
600                 }
601                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
602                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
603         }
604         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
605         BUG_ON(ret < 0);
606
607         return rec;
608
609 cleanup:
610         if (!list_empty(&rec->backrefs))
611                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
612                         list_del(&orig->list);
613                         free(orig);
614                 }
615
616         if (!list_empty(&rec->orphan_extents))
617                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
618                         list_del(&orig->list);
619                         free(orig);
620                 }
621
622         free(rec);
623
624         return ERR_PTR(ret);
625 }
626
627 static void print_orphan_data_extents(struct list_head *orphan_extents,
628                                       u64 objectid)
629 {
630         struct orphan_data_extent *orphan;
631
632         if (list_empty(orphan_extents))
633                 return;
634         printf("The following data extent is lost in tree %llu:\n",
635                objectid);
636         list_for_each_entry(orphan, orphan_extents, list) {
637                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
638                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
639                        orphan->disk_len);
640         }
641 }
642
643 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
644 {
645         u64 root_objectid = root->root_key.objectid;
646         int errors = rec->errors;
647
648         if (!errors)
649                 return;
650         /* reloc root errors, we print its corresponding fs root objectid*/
651         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
652                 root_objectid = root->root_key.offset;
653                 fprintf(stderr, "reloc");
654         }
655         fprintf(stderr, "root %llu inode %llu errors %x",
656                 (unsigned long long) root_objectid,
657                 (unsigned long long) rec->ino, rec->errors);
658
659         if (errors & I_ERR_NO_INODE_ITEM)
660                 fprintf(stderr, ", no inode item");
661         if (errors & I_ERR_NO_ORPHAN_ITEM)
662                 fprintf(stderr, ", no orphan item");
663         if (errors & I_ERR_DUP_INODE_ITEM)
664                 fprintf(stderr, ", dup inode item");
665         if (errors & I_ERR_DUP_DIR_INDEX)
666                 fprintf(stderr, ", dup dir index");
667         if (errors & I_ERR_ODD_DIR_ITEM)
668                 fprintf(stderr, ", odd dir item");
669         if (errors & I_ERR_ODD_FILE_EXTENT)
670                 fprintf(stderr, ", odd file extent");
671         if (errors & I_ERR_BAD_FILE_EXTENT)
672                 fprintf(stderr, ", bad file extent");
673         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
674                 fprintf(stderr, ", file extent overlap");
675         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
676                 fprintf(stderr, ", file extent discount");
677         if (errors & I_ERR_DIR_ISIZE_WRONG)
678                 fprintf(stderr, ", dir isize wrong");
679         if (errors & I_ERR_FILE_NBYTES_WRONG)
680                 fprintf(stderr, ", nbytes wrong");
681         if (errors & I_ERR_ODD_CSUM_ITEM)
682                 fprintf(stderr, ", odd csum item");
683         if (errors & I_ERR_SOME_CSUM_MISSING)
684                 fprintf(stderr, ", some csum missing");
685         if (errors & I_ERR_LINK_COUNT_WRONG)
686                 fprintf(stderr, ", link count wrong");
687         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
688                 fprintf(stderr, ", orphan file extent");
689         fprintf(stderr, "\n");
690         /* Print the orphan extents if needed */
691         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
692                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
693
694         /* Print the holes if needed */
695         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
696                 struct file_extent_hole *hole;
697                 struct rb_node *node;
698                 int found = 0;
699
700                 node = rb_first(&rec->holes);
701                 fprintf(stderr, "Found file extent holes:\n");
702                 while (node) {
703                         found = 1;
704                         hole = rb_entry(node, struct file_extent_hole, node);
705                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
706                                 hole->start, hole->len);
707                         node = rb_next(node);
708                 }
709                 if (!found)
710                         fprintf(stderr, "\tstart: 0, len: %llu\n",
711                                 round_up(rec->isize, root->sectorsize));
712         }
713 }
714
715 static void print_ref_error(int errors)
716 {
717         if (errors & REF_ERR_NO_DIR_ITEM)
718                 fprintf(stderr, ", no dir item");
719         if (errors & REF_ERR_NO_DIR_INDEX)
720                 fprintf(stderr, ", no dir index");
721         if (errors & REF_ERR_NO_INODE_REF)
722                 fprintf(stderr, ", no inode ref");
723         if (errors & REF_ERR_DUP_DIR_ITEM)
724                 fprintf(stderr, ", dup dir item");
725         if (errors & REF_ERR_DUP_DIR_INDEX)
726                 fprintf(stderr, ", dup dir index");
727         if (errors & REF_ERR_DUP_INODE_REF)
728                 fprintf(stderr, ", dup inode ref");
729         if (errors & REF_ERR_INDEX_UNMATCH)
730                 fprintf(stderr, ", index unmatch");
731         if (errors & REF_ERR_FILETYPE_UNMATCH)
732                 fprintf(stderr, ", filetype unmatch");
733         if (errors & REF_ERR_NAME_TOO_LONG)
734                 fprintf(stderr, ", name too long");
735         if (errors & REF_ERR_NO_ROOT_REF)
736                 fprintf(stderr, ", no root ref");
737         if (errors & REF_ERR_NO_ROOT_BACKREF)
738                 fprintf(stderr, ", no root backref");
739         if (errors & REF_ERR_DUP_ROOT_REF)
740                 fprintf(stderr, ", dup root ref");
741         if (errors & REF_ERR_DUP_ROOT_BACKREF)
742                 fprintf(stderr, ", dup root backref");
743         fprintf(stderr, "\n");
744 }
745
746 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
747                                           u64 ino, int mod)
748 {
749         struct ptr_node *node;
750         struct cache_extent *cache;
751         struct inode_record *rec = NULL;
752         int ret;
753
754         cache = lookup_cache_extent(inode_cache, ino, 1);
755         if (cache) {
756                 node = container_of(cache, struct ptr_node, cache);
757                 rec = node->data;
758                 if (mod && rec->refs > 1) {
759                         node->data = clone_inode_rec(rec);
760                         if (IS_ERR(node->data))
761                                 return node->data;
762                         rec->refs--;
763                         rec = node->data;
764                 }
765         } else if (mod) {
766                 rec = calloc(1, sizeof(*rec));
767                 if (!rec)
768                         return ERR_PTR(-ENOMEM);
769                 rec->ino = ino;
770                 rec->extent_start = (u64)-1;
771                 rec->refs = 1;
772                 INIT_LIST_HEAD(&rec->backrefs);
773                 INIT_LIST_HEAD(&rec->orphan_extents);
774                 rec->holes = RB_ROOT;
775
776                 node = malloc(sizeof(*node));
777                 if (!node) {
778                         free(rec);
779                         return ERR_PTR(-ENOMEM);
780                 }
781                 node->cache.start = ino;
782                 node->cache.size = 1;
783                 node->data = rec;
784
785                 if (ino == BTRFS_FREE_INO_OBJECTID)
786                         rec->found_link = 1;
787
788                 ret = insert_cache_extent(inode_cache, &node->cache);
789                 if (ret)
790                         return ERR_PTR(-EEXIST);
791         }
792         return rec;
793 }
794
795 static void free_orphan_data_extents(struct list_head *orphan_extents)
796 {
797         struct orphan_data_extent *orphan;
798
799         while (!list_empty(orphan_extents)) {
800                 orphan = list_entry(orphan_extents->next,
801                                     struct orphan_data_extent, list);
802                 list_del(&orphan->list);
803                 free(orphan);
804         }
805 }
806
807 static void free_inode_rec(struct inode_record *rec)
808 {
809         struct inode_backref *backref;
810
811         if (--rec->refs > 0)
812                 return;
813
814         while (!list_empty(&rec->backrefs)) {
815                 backref = list_entry(rec->backrefs.next,
816                                      struct inode_backref, list);
817                 list_del(&backref->list);
818                 free(backref);
819         }
820         free_orphan_data_extents(&rec->orphan_extents);
821         free_file_extent_holes(&rec->holes);
822         free(rec);
823 }
824
825 static int can_free_inode_rec(struct inode_record *rec)
826 {
827         if (!rec->errors && rec->checked && rec->found_inode_item &&
828             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
829                 return 1;
830         return 0;
831 }
832
833 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
834                                  struct inode_record *rec)
835 {
836         struct cache_extent *cache;
837         struct inode_backref *tmp, *backref;
838         struct ptr_node *node;
839         unsigned char filetype;
840
841         if (!rec->found_inode_item)
842                 return;
843
844         filetype = imode_to_type(rec->imode);
845         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
846                 if (backref->found_dir_item && backref->found_dir_index) {
847                         if (backref->filetype != filetype)
848                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
849                         if (!backref->errors && backref->found_inode_ref &&
850                             rec->nlink == rec->found_link) {
851                                 list_del(&backref->list);
852                                 free(backref);
853                         }
854                 }
855         }
856
857         if (!rec->checked || rec->merging)
858                 return;
859
860         if (S_ISDIR(rec->imode)) {
861                 if (rec->found_size != rec->isize)
862                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
863                 if (rec->found_file_extent)
864                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
865         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
866                 if (rec->found_dir_item)
867                         rec->errors |= I_ERR_ODD_DIR_ITEM;
868                 if (rec->found_size != rec->nbytes)
869                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
870                 if (rec->nlink > 0 && !no_holes &&
871                     (rec->extent_end < rec->isize ||
872                      first_extent_gap(&rec->holes) < rec->isize))
873                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
874         }
875
876         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
877                 if (rec->found_csum_item && rec->nodatasum)
878                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
879                 if (rec->some_csum_missing && !rec->nodatasum)
880                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
881         }
882
883         BUG_ON(rec->refs != 1);
884         if (can_free_inode_rec(rec)) {
885                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
886                 node = container_of(cache, struct ptr_node, cache);
887                 BUG_ON(node->data != rec);
888                 remove_cache_extent(inode_cache, &node->cache);
889                 free(node);
890                 free_inode_rec(rec);
891         }
892 }
893
894 static int check_orphan_item(struct btrfs_root *root, u64 ino)
895 {
896         struct btrfs_path path;
897         struct btrfs_key key;
898         int ret;
899
900         key.objectid = BTRFS_ORPHAN_OBJECTID;
901         key.type = BTRFS_ORPHAN_ITEM_KEY;
902         key.offset = ino;
903
904         btrfs_init_path(&path);
905         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
906         btrfs_release_path(&path);
907         if (ret > 0)
908                 ret = -ENOENT;
909         return ret;
910 }
911
912 static int process_inode_item(struct extent_buffer *eb,
913                               int slot, struct btrfs_key *key,
914                               struct shared_node *active_node)
915 {
916         struct inode_record *rec;
917         struct btrfs_inode_item *item;
918
919         rec = active_node->current;
920         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
921         if (rec->found_inode_item) {
922                 rec->errors |= I_ERR_DUP_INODE_ITEM;
923                 return 1;
924         }
925         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
926         rec->nlink = btrfs_inode_nlink(eb, item);
927         rec->isize = btrfs_inode_size(eb, item);
928         rec->nbytes = btrfs_inode_nbytes(eb, item);
929         rec->imode = btrfs_inode_mode(eb, item);
930         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
931                 rec->nodatasum = 1;
932         rec->found_inode_item = 1;
933         if (rec->nlink == 0)
934                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
935         maybe_free_inode_rec(&active_node->inode_cache, rec);
936         return 0;
937 }
938
939 static struct inode_backref *get_inode_backref(struct inode_record *rec,
940                                                 const char *name,
941                                                 int namelen, u64 dir)
942 {
943         struct inode_backref *backref;
944
945         list_for_each_entry(backref, &rec->backrefs, list) {
946                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
947                         break;
948                 if (backref->dir != dir || backref->namelen != namelen)
949                         continue;
950                 if (memcmp(name, backref->name, namelen))
951                         continue;
952                 return backref;
953         }
954
955         backref = malloc(sizeof(*backref) + namelen + 1);
956         if (!backref)
957                 return NULL;
958         memset(backref, 0, sizeof(*backref));
959         backref->dir = dir;
960         backref->namelen = namelen;
961         memcpy(backref->name, name, namelen);
962         backref->name[namelen] = '\0';
963         list_add_tail(&backref->list, &rec->backrefs);
964         return backref;
965 }
966
967 static int add_inode_backref(struct cache_tree *inode_cache,
968                              u64 ino, u64 dir, u64 index,
969                              const char *name, int namelen,
970                              int filetype, int itemtype, int errors)
971 {
972         struct inode_record *rec;
973         struct inode_backref *backref;
974
975         rec = get_inode_rec(inode_cache, ino, 1);
976         BUG_ON(IS_ERR(rec));
977         backref = get_inode_backref(rec, name, namelen, dir);
978         BUG_ON(!backref);
979         if (errors)
980                 backref->errors |= errors;
981         if (itemtype == BTRFS_DIR_INDEX_KEY) {
982                 if (backref->found_dir_index)
983                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
984                 if (backref->found_inode_ref && backref->index != index)
985                         backref->errors |= REF_ERR_INDEX_UNMATCH;
986                 if (backref->found_dir_item && backref->filetype != filetype)
987                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
988
989                 backref->index = index;
990                 backref->filetype = filetype;
991                 backref->found_dir_index = 1;
992         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
993                 rec->found_link++;
994                 if (backref->found_dir_item)
995                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
996                 if (backref->found_dir_index && backref->filetype != filetype)
997                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
998
999                 backref->filetype = filetype;
1000                 backref->found_dir_item = 1;
1001         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1002                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1003                 if (backref->found_inode_ref)
1004                         backref->errors |= REF_ERR_DUP_INODE_REF;
1005                 if (backref->found_dir_index && backref->index != index)
1006                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1007                 else
1008                         backref->index = index;
1009
1010                 backref->ref_type = itemtype;
1011                 backref->found_inode_ref = 1;
1012         } else {
1013                 BUG_ON(1);
1014         }
1015
1016         maybe_free_inode_rec(inode_cache, rec);
1017         return 0;
1018 }
1019
1020 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1021                             struct cache_tree *dst_cache)
1022 {
1023         struct inode_backref *backref;
1024         u32 dir_count = 0;
1025         int ret = 0;
1026
1027         dst->merging = 1;
1028         list_for_each_entry(backref, &src->backrefs, list) {
1029                 if (backref->found_dir_index) {
1030                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1031                                         backref->index, backref->name,
1032                                         backref->namelen, backref->filetype,
1033                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1034                 }
1035                 if (backref->found_dir_item) {
1036                         dir_count++;
1037                         add_inode_backref(dst_cache, dst->ino,
1038                                         backref->dir, 0, backref->name,
1039                                         backref->namelen, backref->filetype,
1040                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1041                 }
1042                 if (backref->found_inode_ref) {
1043                         add_inode_backref(dst_cache, dst->ino,
1044                                         backref->dir, backref->index,
1045                                         backref->name, backref->namelen, 0,
1046                                         backref->ref_type, backref->errors);
1047                 }
1048         }
1049
1050         if (src->found_dir_item)
1051                 dst->found_dir_item = 1;
1052         if (src->found_file_extent)
1053                 dst->found_file_extent = 1;
1054         if (src->found_csum_item)
1055                 dst->found_csum_item = 1;
1056         if (src->some_csum_missing)
1057                 dst->some_csum_missing = 1;
1058         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1059                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1060                 if (ret < 0)
1061                         return ret;
1062         }
1063
1064         BUG_ON(src->found_link < dir_count);
1065         dst->found_link += src->found_link - dir_count;
1066         dst->found_size += src->found_size;
1067         if (src->extent_start != (u64)-1) {
1068                 if (dst->extent_start == (u64)-1) {
1069                         dst->extent_start = src->extent_start;
1070                         dst->extent_end = src->extent_end;
1071                 } else {
1072                         if (dst->extent_end > src->extent_start)
1073                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1074                         else if (dst->extent_end < src->extent_start) {
1075                                 ret = add_file_extent_hole(&dst->holes,
1076                                         dst->extent_end,
1077                                         src->extent_start - dst->extent_end);
1078                         }
1079                         if (dst->extent_end < src->extent_end)
1080                                 dst->extent_end = src->extent_end;
1081                 }
1082         }
1083
1084         dst->errors |= src->errors;
1085         if (src->found_inode_item) {
1086                 if (!dst->found_inode_item) {
1087                         dst->nlink = src->nlink;
1088                         dst->isize = src->isize;
1089                         dst->nbytes = src->nbytes;
1090                         dst->imode = src->imode;
1091                         dst->nodatasum = src->nodatasum;
1092                         dst->found_inode_item = 1;
1093                 } else {
1094                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1095                 }
1096         }
1097         dst->merging = 0;
1098
1099         return 0;
1100 }
1101
1102 static int splice_shared_node(struct shared_node *src_node,
1103                               struct shared_node *dst_node)
1104 {
1105         struct cache_extent *cache;
1106         struct ptr_node *node, *ins;
1107         struct cache_tree *src, *dst;
1108         struct inode_record *rec, *conflict;
1109         u64 current_ino = 0;
1110         int splice = 0;
1111         int ret;
1112
1113         if (--src_node->refs == 0)
1114                 splice = 1;
1115         if (src_node->current)
1116                 current_ino = src_node->current->ino;
1117
1118         src = &src_node->root_cache;
1119         dst = &dst_node->root_cache;
1120 again:
1121         cache = search_cache_extent(src, 0);
1122         while (cache) {
1123                 node = container_of(cache, struct ptr_node, cache);
1124                 rec = node->data;
1125                 cache = next_cache_extent(cache);
1126
1127                 if (splice) {
1128                         remove_cache_extent(src, &node->cache);
1129                         ins = node;
1130                 } else {
1131                         ins = malloc(sizeof(*ins));
1132                         BUG_ON(!ins);
1133                         ins->cache.start = node->cache.start;
1134                         ins->cache.size = node->cache.size;
1135                         ins->data = rec;
1136                         rec->refs++;
1137                 }
1138                 ret = insert_cache_extent(dst, &ins->cache);
1139                 if (ret == -EEXIST) {
1140                         conflict = get_inode_rec(dst, rec->ino, 1);
1141                         BUG_ON(IS_ERR(conflict));
1142                         merge_inode_recs(rec, conflict, dst);
1143                         if (rec->checked) {
1144                                 conflict->checked = 1;
1145                                 if (dst_node->current == conflict)
1146                                         dst_node->current = NULL;
1147                         }
1148                         maybe_free_inode_rec(dst, conflict);
1149                         free_inode_rec(rec);
1150                         free(ins);
1151                 } else {
1152                         BUG_ON(ret);
1153                 }
1154         }
1155
1156         if (src == &src_node->root_cache) {
1157                 src = &src_node->inode_cache;
1158                 dst = &dst_node->inode_cache;
1159                 goto again;
1160         }
1161
1162         if (current_ino > 0 && (!dst_node->current ||
1163             current_ino > dst_node->current->ino)) {
1164                 if (dst_node->current) {
1165                         dst_node->current->checked = 1;
1166                         maybe_free_inode_rec(dst, dst_node->current);
1167                 }
1168                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1169                 BUG_ON(IS_ERR(dst_node->current));
1170         }
1171         return 0;
1172 }
1173
1174 static void free_inode_ptr(struct cache_extent *cache)
1175 {
1176         struct ptr_node *node;
1177         struct inode_record *rec;
1178
1179         node = container_of(cache, struct ptr_node, cache);
1180         rec = node->data;
1181         free_inode_rec(rec);
1182         free(node);
1183 }
1184
1185 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1186
1187 static struct shared_node *find_shared_node(struct cache_tree *shared,
1188                                             u64 bytenr)
1189 {
1190         struct cache_extent *cache;
1191         struct shared_node *node;
1192
1193         cache = lookup_cache_extent(shared, bytenr, 1);
1194         if (cache) {
1195                 node = container_of(cache, struct shared_node, cache);
1196                 return node;
1197         }
1198         return NULL;
1199 }
1200
1201 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1202 {
1203         int ret;
1204         struct shared_node *node;
1205
1206         node = calloc(1, sizeof(*node));
1207         if (!node)
1208                 return -ENOMEM;
1209         node->cache.start = bytenr;
1210         node->cache.size = 1;
1211         cache_tree_init(&node->root_cache);
1212         cache_tree_init(&node->inode_cache);
1213         node->refs = refs;
1214
1215         ret = insert_cache_extent(shared, &node->cache);
1216
1217         return ret;
1218 }
1219
1220 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1221                              struct walk_control *wc, int level)
1222 {
1223         struct shared_node *node;
1224         struct shared_node *dest;
1225         int ret;
1226
1227         if (level == wc->active_node)
1228                 return 0;
1229
1230         BUG_ON(wc->active_node <= level);
1231         node = find_shared_node(&wc->shared, bytenr);
1232         if (!node) {
1233                 ret = add_shared_node(&wc->shared, bytenr, refs);
1234                 BUG_ON(ret);
1235                 node = find_shared_node(&wc->shared, bytenr);
1236                 wc->nodes[level] = node;
1237                 wc->active_node = level;
1238                 return 0;
1239         }
1240
1241         if (wc->root_level == wc->active_node &&
1242             btrfs_root_refs(&root->root_item) == 0) {
1243                 if (--node->refs == 0) {
1244                         free_inode_recs_tree(&node->root_cache);
1245                         free_inode_recs_tree(&node->inode_cache);
1246                         remove_cache_extent(&wc->shared, &node->cache);
1247                         free(node);
1248                 }
1249                 return 1;
1250         }
1251
1252         dest = wc->nodes[wc->active_node];
1253         splice_shared_node(node, dest);
1254         if (node->refs == 0) {
1255                 remove_cache_extent(&wc->shared, &node->cache);
1256                 free(node);
1257         }
1258         return 1;
1259 }
1260
1261 static int leave_shared_node(struct btrfs_root *root,
1262                              struct walk_control *wc, int level)
1263 {
1264         struct shared_node *node;
1265         struct shared_node *dest;
1266         int i;
1267
1268         if (level == wc->root_level)
1269                 return 0;
1270
1271         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1272                 if (wc->nodes[i])
1273                         break;
1274         }
1275         BUG_ON(i >= BTRFS_MAX_LEVEL);
1276
1277         node = wc->nodes[wc->active_node];
1278         wc->nodes[wc->active_node] = NULL;
1279         wc->active_node = i;
1280
1281         dest = wc->nodes[wc->active_node];
1282         if (wc->active_node < wc->root_level ||
1283             btrfs_root_refs(&root->root_item) > 0) {
1284                 BUG_ON(node->refs <= 1);
1285                 splice_shared_node(node, dest);
1286         } else {
1287                 BUG_ON(node->refs < 2);
1288                 node->refs--;
1289         }
1290         return 0;
1291 }
1292
1293 /*
1294  * Returns:
1295  * < 0 - on error
1296  * 1   - if the root with id child_root_id is a child of root parent_root_id
1297  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1298  *       has other root(s) as parent(s)
1299  * 2   - if the root child_root_id doesn't have any parent roots
1300  */
1301 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1302                          u64 child_root_id)
1303 {
1304         struct btrfs_path path;
1305         struct btrfs_key key;
1306         struct extent_buffer *leaf;
1307         int has_parent = 0;
1308         int ret;
1309
1310         btrfs_init_path(&path);
1311
1312         key.objectid = parent_root_id;
1313         key.type = BTRFS_ROOT_REF_KEY;
1314         key.offset = child_root_id;
1315         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1316                                 0, 0);
1317         if (ret < 0)
1318                 return ret;
1319         btrfs_release_path(&path);
1320         if (!ret)
1321                 return 1;
1322
1323         key.objectid = child_root_id;
1324         key.type = BTRFS_ROOT_BACKREF_KEY;
1325         key.offset = 0;
1326         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1327                                 0, 0);
1328         if (ret < 0)
1329                 goto out;
1330
1331         while (1) {
1332                 leaf = path.nodes[0];
1333                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1334                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1335                         if (ret)
1336                                 break;
1337                         leaf = path.nodes[0];
1338                 }
1339
1340                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1341                 if (key.objectid != child_root_id ||
1342                     key.type != BTRFS_ROOT_BACKREF_KEY)
1343                         break;
1344
1345                 has_parent = 1;
1346
1347                 if (key.offset == parent_root_id) {
1348                         btrfs_release_path(&path);
1349                         return 1;
1350                 }
1351
1352                 path.slots[0]++;
1353         }
1354 out:
1355         btrfs_release_path(&path);
1356         if (ret < 0)
1357                 return ret;
1358         return has_parent ? 0 : 2;
1359 }
1360
1361 static int process_dir_item(struct btrfs_root *root,
1362                             struct extent_buffer *eb,
1363                             int slot, struct btrfs_key *key,
1364                             struct shared_node *active_node)
1365 {
1366         u32 total;
1367         u32 cur = 0;
1368         u32 len;
1369         u32 name_len;
1370         u32 data_len;
1371         int error;
1372         int nritems = 0;
1373         int filetype;
1374         struct btrfs_dir_item *di;
1375         struct inode_record *rec;
1376         struct cache_tree *root_cache;
1377         struct cache_tree *inode_cache;
1378         struct btrfs_key location;
1379         char namebuf[BTRFS_NAME_LEN];
1380
1381         root_cache = &active_node->root_cache;
1382         inode_cache = &active_node->inode_cache;
1383         rec = active_node->current;
1384         rec->found_dir_item = 1;
1385
1386         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1387         total = btrfs_item_size_nr(eb, slot);
1388         while (cur < total) {
1389                 nritems++;
1390                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1391                 name_len = btrfs_dir_name_len(eb, di);
1392                 data_len = btrfs_dir_data_len(eb, di);
1393                 filetype = btrfs_dir_type(eb, di);
1394
1395                 rec->found_size += name_len;
1396                 if (name_len <= BTRFS_NAME_LEN) {
1397                         len = name_len;
1398                         error = 0;
1399                 } else {
1400                         len = BTRFS_NAME_LEN;
1401                         error = REF_ERR_NAME_TOO_LONG;
1402                 }
1403                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1404
1405                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1406                         add_inode_backref(inode_cache, location.objectid,
1407                                           key->objectid, key->offset, namebuf,
1408                                           len, filetype, key->type, error);
1409                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1410                         add_inode_backref(root_cache, location.objectid,
1411                                           key->objectid, key->offset,
1412                                           namebuf, len, filetype,
1413                                           key->type, error);
1414                 } else {
1415                         fprintf(stderr, "invalid location in dir item %u\n",
1416                                 location.type);
1417                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1418                                           key->objectid, key->offset, namebuf,
1419                                           len, filetype, key->type, error);
1420                 }
1421
1422                 len = sizeof(*di) + name_len + data_len;
1423                 di = (struct btrfs_dir_item *)((char *)di + len);
1424                 cur += len;
1425         }
1426         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1427                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1428
1429         return 0;
1430 }
1431
1432 static int process_inode_ref(struct extent_buffer *eb,
1433                              int slot, struct btrfs_key *key,
1434                              struct shared_node *active_node)
1435 {
1436         u32 total;
1437         u32 cur = 0;
1438         u32 len;
1439         u32 name_len;
1440         u64 index;
1441         int error;
1442         struct cache_tree *inode_cache;
1443         struct btrfs_inode_ref *ref;
1444         char namebuf[BTRFS_NAME_LEN];
1445
1446         inode_cache = &active_node->inode_cache;
1447
1448         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1449         total = btrfs_item_size_nr(eb, slot);
1450         while (cur < total) {
1451                 name_len = btrfs_inode_ref_name_len(eb, ref);
1452                 index = btrfs_inode_ref_index(eb, ref);
1453                 if (name_len <= BTRFS_NAME_LEN) {
1454                         len = name_len;
1455                         error = 0;
1456                 } else {
1457                         len = BTRFS_NAME_LEN;
1458                         error = REF_ERR_NAME_TOO_LONG;
1459                 }
1460                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1461                 add_inode_backref(inode_cache, key->objectid, key->offset,
1462                                   index, namebuf, len, 0, key->type, error);
1463
1464                 len = sizeof(*ref) + name_len;
1465                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1466                 cur += len;
1467         }
1468         return 0;
1469 }
1470
1471 static int process_inode_extref(struct extent_buffer *eb,
1472                                 int slot, struct btrfs_key *key,
1473                                 struct shared_node *active_node)
1474 {
1475         u32 total;
1476         u32 cur = 0;
1477         u32 len;
1478         u32 name_len;
1479         u64 index;
1480         u64 parent;
1481         int error;
1482         struct cache_tree *inode_cache;
1483         struct btrfs_inode_extref *extref;
1484         char namebuf[BTRFS_NAME_LEN];
1485
1486         inode_cache = &active_node->inode_cache;
1487
1488         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1489         total = btrfs_item_size_nr(eb, slot);
1490         while (cur < total) {
1491                 name_len = btrfs_inode_extref_name_len(eb, extref);
1492                 index = btrfs_inode_extref_index(eb, extref);
1493                 parent = btrfs_inode_extref_parent(eb, extref);
1494                 if (name_len <= BTRFS_NAME_LEN) {
1495                         len = name_len;
1496                         error = 0;
1497                 } else {
1498                         len = BTRFS_NAME_LEN;
1499                         error = REF_ERR_NAME_TOO_LONG;
1500                 }
1501                 read_extent_buffer(eb, namebuf,
1502                                    (unsigned long)(extref + 1), len);
1503                 add_inode_backref(inode_cache, key->objectid, parent,
1504                                   index, namebuf, len, 0, key->type, error);
1505
1506                 len = sizeof(*extref) + name_len;
1507                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1508                 cur += len;
1509         }
1510         return 0;
1511
1512 }
1513
1514 static int count_csum_range(struct btrfs_root *root, u64 start,
1515                             u64 len, u64 *found)
1516 {
1517         struct btrfs_key key;
1518         struct btrfs_path path;
1519         struct extent_buffer *leaf;
1520         int ret;
1521         size_t size;
1522         *found = 0;
1523         u64 csum_end;
1524         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1525
1526         btrfs_init_path(&path);
1527
1528         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1529         key.offset = start;
1530         key.type = BTRFS_EXTENT_CSUM_KEY;
1531
1532         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1533                                 &key, &path, 0, 0);
1534         if (ret < 0)
1535                 goto out;
1536         if (ret > 0 && path.slots[0] > 0) {
1537                 leaf = path.nodes[0];
1538                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1539                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1540                     key.type == BTRFS_EXTENT_CSUM_KEY)
1541                         path.slots[0]--;
1542         }
1543
1544         while (len > 0) {
1545                 leaf = path.nodes[0];
1546                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1547                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1548                         if (ret > 0)
1549                                 break;
1550                         else if (ret < 0)
1551                                 goto out;
1552                         leaf = path.nodes[0];
1553                 }
1554
1555                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1556                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1557                     key.type != BTRFS_EXTENT_CSUM_KEY)
1558                         break;
1559
1560                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1561                 if (key.offset >= start + len)
1562                         break;
1563
1564                 if (key.offset > start)
1565                         start = key.offset;
1566
1567                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1568                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1569                 if (csum_end > start) {
1570                         size = min(csum_end - start, len);
1571                         len -= size;
1572                         start += size;
1573                         *found += size;
1574                 }
1575
1576                 path.slots[0]++;
1577         }
1578 out:
1579         btrfs_release_path(&path);
1580         if (ret < 0)
1581                 return ret;
1582         return 0;
1583 }
1584
1585 static int process_file_extent(struct btrfs_root *root,
1586                                 struct extent_buffer *eb,
1587                                 int slot, struct btrfs_key *key,
1588                                 struct shared_node *active_node)
1589 {
1590         struct inode_record *rec;
1591         struct btrfs_file_extent_item *fi;
1592         u64 num_bytes = 0;
1593         u64 disk_bytenr = 0;
1594         u64 extent_offset = 0;
1595         u64 mask = root->sectorsize - 1;
1596         int extent_type;
1597         int ret;
1598
1599         rec = active_node->current;
1600         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1601         rec->found_file_extent = 1;
1602
1603         if (rec->extent_start == (u64)-1) {
1604                 rec->extent_start = key->offset;
1605                 rec->extent_end = key->offset;
1606         }
1607
1608         if (rec->extent_end > key->offset)
1609                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1610         else if (rec->extent_end < key->offset) {
1611                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1612                                            key->offset - rec->extent_end);
1613                 if (ret < 0)
1614                         return ret;
1615         }
1616
1617         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1618         extent_type = btrfs_file_extent_type(eb, fi);
1619
1620         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1621                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1622                 if (num_bytes == 0)
1623                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1624                 rec->found_size += num_bytes;
1625                 num_bytes = (num_bytes + mask) & ~mask;
1626         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1627                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1628                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1629                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1630                 extent_offset = btrfs_file_extent_offset(eb, fi);
1631                 if (num_bytes == 0 || (num_bytes & mask))
1632                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1633                 if (num_bytes + extent_offset >
1634                     btrfs_file_extent_ram_bytes(eb, fi))
1635                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1636                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1637                     (btrfs_file_extent_compression(eb, fi) ||
1638                      btrfs_file_extent_encryption(eb, fi) ||
1639                      btrfs_file_extent_other_encoding(eb, fi)))
1640                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1641                 if (disk_bytenr > 0)
1642                         rec->found_size += num_bytes;
1643         } else {
1644                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1645         }
1646         rec->extent_end = key->offset + num_bytes;
1647
1648         /*
1649          * The data reloc tree will copy full extents into its inode and then
1650          * copy the corresponding csums.  Because the extent it copied could be
1651          * a preallocated extent that hasn't been written to yet there may be no
1652          * csums to copy, ergo we won't have csums for our file extent.  This is
1653          * ok so just don't bother checking csums if the inode belongs to the
1654          * data reloc tree.
1655          */
1656         if (disk_bytenr > 0 &&
1657             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1658                 u64 found;
1659                 if (btrfs_file_extent_compression(eb, fi))
1660                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1661                 else
1662                         disk_bytenr += extent_offset;
1663
1664                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1665                 if (ret < 0)
1666                         return ret;
1667                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1668                         if (found > 0)
1669                                 rec->found_csum_item = 1;
1670                         if (found < num_bytes)
1671                                 rec->some_csum_missing = 1;
1672                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1673                         if (found > 0)
1674                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1675                 }
1676         }
1677         return 0;
1678 }
1679
1680 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1681                             struct walk_control *wc)
1682 {
1683         struct btrfs_key key;
1684         u32 nritems;
1685         int i;
1686         int ret = 0;
1687         struct cache_tree *inode_cache;
1688         struct shared_node *active_node;
1689
1690         if (wc->root_level == wc->active_node &&
1691             btrfs_root_refs(&root->root_item) == 0)
1692                 return 0;
1693
1694         active_node = wc->nodes[wc->active_node];
1695         inode_cache = &active_node->inode_cache;
1696         nritems = btrfs_header_nritems(eb);
1697         for (i = 0; i < nritems; i++) {
1698                 btrfs_item_key_to_cpu(eb, &key, i);
1699
1700                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1701                         continue;
1702                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1703                         continue;
1704
1705                 if (active_node->current == NULL ||
1706                     active_node->current->ino < key.objectid) {
1707                         if (active_node->current) {
1708                                 active_node->current->checked = 1;
1709                                 maybe_free_inode_rec(inode_cache,
1710                                                      active_node->current);
1711                         }
1712                         active_node->current = get_inode_rec(inode_cache,
1713                                                              key.objectid, 1);
1714                         BUG_ON(IS_ERR(active_node->current));
1715                 }
1716                 switch (key.type) {
1717                 case BTRFS_DIR_ITEM_KEY:
1718                 case BTRFS_DIR_INDEX_KEY:
1719                         ret = process_dir_item(root, eb, i, &key, active_node);
1720                         break;
1721                 case BTRFS_INODE_REF_KEY:
1722                         ret = process_inode_ref(eb, i, &key, active_node);
1723                         break;
1724                 case BTRFS_INODE_EXTREF_KEY:
1725                         ret = process_inode_extref(eb, i, &key, active_node);
1726                         break;
1727                 case BTRFS_INODE_ITEM_KEY:
1728                         ret = process_inode_item(eb, i, &key, active_node);
1729                         break;
1730                 case BTRFS_EXTENT_DATA_KEY:
1731                         ret = process_file_extent(root, eb, i, &key,
1732                                                   active_node);
1733                         break;
1734                 default:
1735                         break;
1736                 };
1737         }
1738         return ret;
1739 }
1740
1741 static void reada_walk_down(struct btrfs_root *root,
1742                             struct extent_buffer *node, int slot)
1743 {
1744         u64 bytenr;
1745         u64 ptr_gen;
1746         u32 nritems;
1747         u32 blocksize;
1748         int i;
1749         int level;
1750
1751         level = btrfs_header_level(node);
1752         if (level != 1)
1753                 return;
1754
1755         nritems = btrfs_header_nritems(node);
1756         blocksize = btrfs_level_size(root, level - 1);
1757         for (i = slot; i < nritems; i++) {
1758                 bytenr = btrfs_node_blockptr(node, i);
1759                 ptr_gen = btrfs_node_ptr_generation(node, i);
1760                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1761         }
1762 }
1763
1764 /*
1765  * Check the child node/leaf by the following condition:
1766  * 1. the first item key of the node/leaf should be the same with the one
1767  *    in parent.
1768  * 2. block in parent node should match the child node/leaf.
1769  * 3. generation of parent node and child's header should be consistent.
1770  *
1771  * Or the child node/leaf pointed by the key in parent is not valid.
1772  *
1773  * We hope to check leaf owner too, but since subvol may share leaves,
1774  * which makes leaf owner check not so strong, key check should be
1775  * sufficient enough for that case.
1776  */
1777 static int check_child_node(struct btrfs_root *root,
1778                             struct extent_buffer *parent, int slot,
1779                             struct extent_buffer *child)
1780 {
1781         struct btrfs_key parent_key;
1782         struct btrfs_key child_key;
1783         int ret = 0;
1784
1785         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1786         if (btrfs_header_level(child) == 0)
1787                 btrfs_item_key_to_cpu(child, &child_key, 0);
1788         else
1789                 btrfs_node_key_to_cpu(child, &child_key, 0);
1790
1791         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1792                 ret = -EINVAL;
1793                 fprintf(stderr,
1794                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1795                         parent_key.objectid, parent_key.type, parent_key.offset,
1796                         child_key.objectid, child_key.type, child_key.offset);
1797         }
1798         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1799                 ret = -EINVAL;
1800                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1801                         btrfs_node_blockptr(parent, slot),
1802                         btrfs_header_bytenr(child));
1803         }
1804         if (btrfs_node_ptr_generation(parent, slot) !=
1805             btrfs_header_generation(child)) {
1806                 ret = -EINVAL;
1807                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1808                         btrfs_header_generation(child),
1809                         btrfs_node_ptr_generation(parent, slot));
1810         }
1811         return ret;
1812 }
1813
1814 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1815                           struct walk_control *wc, int *level)
1816 {
1817         enum btrfs_tree_block_status status;
1818         u64 bytenr;
1819         u64 ptr_gen;
1820         struct extent_buffer *next;
1821         struct extent_buffer *cur;
1822         u32 blocksize;
1823         int ret, err = 0;
1824         u64 refs;
1825
1826         WARN_ON(*level < 0);
1827         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1828         ret = btrfs_lookup_extent_info(NULL, root,
1829                                        path->nodes[*level]->start,
1830                                        *level, 1, &refs, NULL);
1831         if (ret < 0) {
1832                 err = ret;
1833                 goto out;
1834         }
1835
1836         if (refs > 1) {
1837                 ret = enter_shared_node(root, path->nodes[*level]->start,
1838                                         refs, wc, *level);
1839                 if (ret > 0) {
1840                         err = ret;
1841                         goto out;
1842                 }
1843         }
1844
1845         while (*level >= 0) {
1846                 WARN_ON(*level < 0);
1847                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1848                 cur = path->nodes[*level];
1849
1850                 if (btrfs_header_level(cur) != *level)
1851                         WARN_ON(1);
1852
1853                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1854                         break;
1855                 if (*level == 0) {
1856                         ret = process_one_leaf(root, cur, wc);
1857                         if (ret < 0)
1858                                 err = ret;
1859                         break;
1860                 }
1861                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1862                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1863                 blocksize = btrfs_level_size(root, *level - 1);
1864                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1865                                                1, &refs, NULL);
1866                 if (ret < 0)
1867                         refs = 0;
1868
1869                 if (refs > 1) {
1870                         ret = enter_shared_node(root, bytenr, refs,
1871                                                 wc, *level - 1);
1872                         if (ret > 0) {
1873                                 path->slots[*level]++;
1874                                 continue;
1875                         }
1876                 }
1877
1878                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1879                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1880                         free_extent_buffer(next);
1881                         reada_walk_down(root, cur, path->slots[*level]);
1882                         next = read_tree_block(root, bytenr, blocksize,
1883                                                ptr_gen);
1884                         if (!extent_buffer_uptodate(next)) {
1885                                 struct btrfs_key node_key;
1886
1887                                 btrfs_node_key_to_cpu(path->nodes[*level],
1888                                                       &node_key,
1889                                                       path->slots[*level]);
1890                                 btrfs_add_corrupt_extent_record(root->fs_info,
1891                                                 &node_key,
1892                                                 path->nodes[*level]->start,
1893                                                 root->leafsize, *level);
1894                                 err = -EIO;
1895                                 goto out;
1896                         }
1897                 }
1898
1899                 ret = check_child_node(root, cur, path->slots[*level], next);
1900                 if (ret) {
1901                         err = ret;
1902                         goto out;
1903                 }
1904
1905                 if (btrfs_is_leaf(next))
1906                         status = btrfs_check_leaf(root, NULL, next);
1907                 else
1908                         status = btrfs_check_node(root, NULL, next);
1909                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1910                         free_extent_buffer(next);
1911                         err = -EIO;
1912                         goto out;
1913                 }
1914
1915                 *level = *level - 1;
1916                 free_extent_buffer(path->nodes[*level]);
1917                 path->nodes[*level] = next;
1918                 path->slots[*level] = 0;
1919         }
1920 out:
1921         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1922         return err;
1923 }
1924
1925 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1926                         struct walk_control *wc, int *level)
1927 {
1928         int i;
1929         struct extent_buffer *leaf;
1930
1931         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1932                 leaf = path->nodes[i];
1933                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1934                         path->slots[i]++;
1935                         *level = i;
1936                         return 0;
1937                 } else {
1938                         free_extent_buffer(path->nodes[*level]);
1939                         path->nodes[*level] = NULL;
1940                         BUG_ON(*level > wc->active_node);
1941                         if (*level == wc->active_node)
1942                                 leave_shared_node(root, wc, *level);
1943                         *level = i + 1;
1944                 }
1945         }
1946         return 1;
1947 }
1948
1949 static int check_root_dir(struct inode_record *rec)
1950 {
1951         struct inode_backref *backref;
1952         int ret = -1;
1953
1954         if (!rec->found_inode_item || rec->errors)
1955                 goto out;
1956         if (rec->nlink != 1 || rec->found_link != 0)
1957                 goto out;
1958         if (list_empty(&rec->backrefs))
1959                 goto out;
1960         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1961         if (!backref->found_inode_ref)
1962                 goto out;
1963         if (backref->index != 0 || backref->namelen != 2 ||
1964             memcmp(backref->name, "..", 2))
1965                 goto out;
1966         if (backref->found_dir_index || backref->found_dir_item)
1967                 goto out;
1968         ret = 0;
1969 out:
1970         return ret;
1971 }
1972
1973 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1974                               struct btrfs_root *root, struct btrfs_path *path,
1975                               struct inode_record *rec)
1976 {
1977         struct btrfs_inode_item *ei;
1978         struct btrfs_key key;
1979         int ret;
1980
1981         key.objectid = rec->ino;
1982         key.type = BTRFS_INODE_ITEM_KEY;
1983         key.offset = (u64)-1;
1984
1985         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1986         if (ret < 0)
1987                 goto out;
1988         if (ret) {
1989                 if (!path->slots[0]) {
1990                         ret = -ENOENT;
1991                         goto out;
1992                 }
1993                 path->slots[0]--;
1994                 ret = 0;
1995         }
1996         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1997         if (key.objectid != rec->ino) {
1998                 ret = -ENOENT;
1999                 goto out;
2000         }
2001
2002         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2003                             struct btrfs_inode_item);
2004         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2005         btrfs_mark_buffer_dirty(path->nodes[0]);
2006         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2007         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2008                root->root_key.objectid);
2009 out:
2010         btrfs_release_path(path);
2011         return ret;
2012 }
2013
2014 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2015                                     struct btrfs_root *root,
2016                                     struct btrfs_path *path,
2017                                     struct inode_record *rec)
2018 {
2019         int ret;
2020
2021         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2022         btrfs_release_path(path);
2023         if (!ret)
2024                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2025         return ret;
2026 }
2027
2028 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2029                                struct btrfs_root *root,
2030                                struct btrfs_path *path,
2031                                struct inode_record *rec)
2032 {
2033         struct btrfs_inode_item *ei;
2034         struct btrfs_key key;
2035         int ret = 0;
2036
2037         key.objectid = rec->ino;
2038         key.type = BTRFS_INODE_ITEM_KEY;
2039         key.offset = 0;
2040
2041         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2042         if (ret) {
2043                 if (ret > 0)
2044                         ret = -ENOENT;
2045                 goto out;
2046         }
2047
2048         /* Since ret == 0, no need to check anything */
2049         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2050                             struct btrfs_inode_item);
2051         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2052         btrfs_mark_buffer_dirty(path->nodes[0]);
2053         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2054         printf("reset nbytes for ino %llu root %llu\n",
2055                rec->ino, root->root_key.objectid);
2056 out:
2057         btrfs_release_path(path);
2058         return ret;
2059 }
2060
2061 static int add_missing_dir_index(struct btrfs_root *root,
2062                                  struct cache_tree *inode_cache,
2063                                  struct inode_record *rec,
2064                                  struct inode_backref *backref)
2065 {
2066         struct btrfs_path *path;
2067         struct btrfs_trans_handle *trans;
2068         struct btrfs_dir_item *dir_item;
2069         struct extent_buffer *leaf;
2070         struct btrfs_key key;
2071         struct btrfs_disk_key disk_key;
2072         struct inode_record *dir_rec;
2073         unsigned long name_ptr;
2074         u32 data_size = sizeof(*dir_item) + backref->namelen;
2075         int ret;
2076
2077         path = btrfs_alloc_path();
2078         if (!path)
2079                 return -ENOMEM;
2080
2081         trans = btrfs_start_transaction(root, 1);
2082         if (IS_ERR(trans)) {
2083                 btrfs_free_path(path);
2084                 return PTR_ERR(trans);
2085         }
2086
2087         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2088                 (unsigned long long)rec->ino);
2089         key.objectid = backref->dir;
2090         key.type = BTRFS_DIR_INDEX_KEY;
2091         key.offset = backref->index;
2092
2093         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2094         BUG_ON(ret);
2095
2096         leaf = path->nodes[0];
2097         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2098
2099         disk_key.objectid = cpu_to_le64(rec->ino);
2100         disk_key.type = BTRFS_INODE_ITEM_KEY;
2101         disk_key.offset = 0;
2102
2103         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2104         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2105         btrfs_set_dir_data_len(leaf, dir_item, 0);
2106         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2107         name_ptr = (unsigned long)(dir_item + 1);
2108         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2109         btrfs_mark_buffer_dirty(leaf);
2110         btrfs_free_path(path);
2111         btrfs_commit_transaction(trans, root);
2112
2113         backref->found_dir_index = 1;
2114         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2115         BUG_ON(IS_ERR(dir_rec));
2116         if (!dir_rec)
2117                 return 0;
2118         dir_rec->found_size += backref->namelen;
2119         if (dir_rec->found_size == dir_rec->isize &&
2120             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2121                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2122         if (dir_rec->found_size != dir_rec->isize)
2123                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2124
2125         return 0;
2126 }
2127
2128 static int delete_dir_index(struct btrfs_root *root,
2129                             struct cache_tree *inode_cache,
2130                             struct inode_record *rec,
2131                             struct inode_backref *backref)
2132 {
2133         struct btrfs_trans_handle *trans;
2134         struct btrfs_dir_item *di;
2135         struct btrfs_path *path;
2136         int ret = 0;
2137
2138         path = btrfs_alloc_path();
2139         if (!path)
2140                 return -ENOMEM;
2141
2142         trans = btrfs_start_transaction(root, 1);
2143         if (IS_ERR(trans)) {
2144                 btrfs_free_path(path);
2145                 return PTR_ERR(trans);
2146         }
2147
2148
2149         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2150                 (unsigned long long)backref->dir,
2151                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2152                 (unsigned long long)root->objectid);
2153
2154         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2155                                     backref->name, backref->namelen,
2156                                     backref->index, -1);
2157         if (IS_ERR(di)) {
2158                 ret = PTR_ERR(di);
2159                 btrfs_free_path(path);
2160                 btrfs_commit_transaction(trans, root);
2161                 if (ret == -ENOENT)
2162                         return 0;
2163                 return ret;
2164         }
2165
2166         if (!di)
2167                 ret = btrfs_del_item(trans, root, path);
2168         else
2169                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2170         BUG_ON(ret);
2171         btrfs_free_path(path);
2172         btrfs_commit_transaction(trans, root);
2173         return ret;
2174 }
2175
2176 static int create_inode_item(struct btrfs_root *root,
2177                              struct inode_record *rec,
2178                              struct inode_backref *backref, int root_dir)
2179 {
2180         struct btrfs_trans_handle *trans;
2181         struct btrfs_inode_item inode_item;
2182         time_t now = time(NULL);
2183         int ret;
2184
2185         trans = btrfs_start_transaction(root, 1);
2186         if (IS_ERR(trans)) {
2187                 ret = PTR_ERR(trans);
2188                 return ret;
2189         }
2190
2191         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2192                 "be incomplete, please check permissions and content after "
2193                 "the fsck completes.\n", (unsigned long long)root->objectid,
2194                 (unsigned long long)rec->ino);
2195
2196         memset(&inode_item, 0, sizeof(inode_item));
2197         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2198         if (root_dir)
2199                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2200         else
2201                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2202         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2203         if (rec->found_dir_item) {
2204                 if (rec->found_file_extent)
2205                         fprintf(stderr, "root %llu inode %llu has both a dir "
2206                                 "item and extents, unsure if it is a dir or a "
2207                                 "regular file so setting it as a directory\n",
2208                                 (unsigned long long)root->objectid,
2209                                 (unsigned long long)rec->ino);
2210                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2211                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2212         } else if (!rec->found_dir_item) {
2213                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2214                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2215         }
2216         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2217         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2218         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2219         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2220         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2221         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2222         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2223         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2224
2225         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2226         BUG_ON(ret);
2227         btrfs_commit_transaction(trans, root);
2228         return 0;
2229 }
2230
2231 static int repair_inode_backrefs(struct btrfs_root *root,
2232                                  struct inode_record *rec,
2233                                  struct cache_tree *inode_cache,
2234                                  int delete)
2235 {
2236         struct inode_backref *tmp, *backref;
2237         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2238         int ret = 0;
2239         int repaired = 0;
2240
2241         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2242                 if (!delete && rec->ino == root_dirid) {
2243                         if (!rec->found_inode_item) {
2244                                 ret = create_inode_item(root, rec, backref, 1);
2245                                 if (ret)
2246                                         break;
2247                                 repaired++;
2248                         }
2249                 }
2250
2251                 /* Index 0 for root dir's are special, don't mess with it */
2252                 if (rec->ino == root_dirid && backref->index == 0)
2253                         continue;
2254
2255                 if (delete &&
2256                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2257                      (backref->found_dir_index && backref->found_inode_ref &&
2258                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2259                         ret = delete_dir_index(root, inode_cache, rec, backref);
2260                         if (ret)
2261                                 break;
2262                         repaired++;
2263                         list_del(&backref->list);
2264                         free(backref);
2265                 }
2266
2267                 if (!delete && !backref->found_dir_index &&
2268                     backref->found_dir_item && backref->found_inode_ref) {
2269                         ret = add_missing_dir_index(root, inode_cache, rec,
2270                                                     backref);
2271                         if (ret)
2272                                 break;
2273                         repaired++;
2274                         if (backref->found_dir_item &&
2275                             backref->found_dir_index &&
2276                             backref->found_dir_index) {
2277                                 if (!backref->errors &&
2278                                     backref->found_inode_ref) {
2279                                         list_del(&backref->list);
2280                                         free(backref);
2281                                 }
2282                         }
2283                 }
2284
2285                 if (!delete && (!backref->found_dir_index &&
2286                                 !backref->found_dir_item &&
2287                                 backref->found_inode_ref)) {
2288                         struct btrfs_trans_handle *trans;
2289                         struct btrfs_key location;
2290
2291                         ret = check_dir_conflict(root, backref->name,
2292                                                  backref->namelen,
2293                                                  backref->dir,
2294                                                  backref->index);
2295                         if (ret) {
2296                                 /*
2297                                  * let nlink fixing routine to handle it,
2298                                  * which can do it better.
2299                                  */
2300                                 ret = 0;
2301                                 break;
2302                         }
2303                         location.objectid = rec->ino;
2304                         location.type = BTRFS_INODE_ITEM_KEY;
2305                         location.offset = 0;
2306
2307                         trans = btrfs_start_transaction(root, 1);
2308                         if (IS_ERR(trans)) {
2309                                 ret = PTR_ERR(trans);
2310                                 break;
2311                         }
2312                         fprintf(stderr, "adding missing dir index/item pair "
2313                                 "for inode %llu\n",
2314                                 (unsigned long long)rec->ino);
2315                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2316                                                     backref->namelen,
2317                                                     backref->dir, &location,
2318                                                     imode_to_type(rec->imode),
2319                                                     backref->index);
2320                         BUG_ON(ret);
2321                         btrfs_commit_transaction(trans, root);
2322                         repaired++;
2323                 }
2324
2325                 if (!delete && (backref->found_inode_ref &&
2326                                 backref->found_dir_index &&
2327                                 backref->found_dir_item &&
2328                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2329                                 !rec->found_inode_item)) {
2330                         ret = create_inode_item(root, rec, backref, 0);
2331                         if (ret)
2332                                 break;
2333                         repaired++;
2334                 }
2335
2336         }
2337         return ret ? ret : repaired;
2338 }
2339
2340 /*
2341  * To determine the file type for nlink/inode_item repair
2342  *
2343  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2344  * Return -ENOENT if file type is not found.
2345  */
2346 static int find_file_type(struct inode_record *rec, u8 *type)
2347 {
2348         struct inode_backref *backref;
2349
2350         /* For inode item recovered case */
2351         if (rec->found_inode_item) {
2352                 *type = imode_to_type(rec->imode);
2353                 return 0;
2354         }
2355
2356         list_for_each_entry(backref, &rec->backrefs, list) {
2357                 if (backref->found_dir_index || backref->found_dir_item) {
2358                         *type = backref->filetype;
2359                         return 0;
2360                 }
2361         }
2362         return -ENOENT;
2363 }
2364
2365 /*
2366  * To determine the file name for nlink repair
2367  *
2368  * Return 0 if file name is found, set name and namelen.
2369  * Return -ENOENT if file name is not found.
2370  */
2371 static int find_file_name(struct inode_record *rec,
2372                           char *name, int *namelen)
2373 {
2374         struct inode_backref *backref;
2375
2376         list_for_each_entry(backref, &rec->backrefs, list) {
2377                 if (backref->found_dir_index || backref->found_dir_item ||
2378                     backref->found_inode_ref) {
2379                         memcpy(name, backref->name, backref->namelen);
2380                         *namelen = backref->namelen;
2381                         return 0;
2382                 }
2383         }
2384         return -ENOENT;
2385 }
2386
2387 /* Reset the nlink of the inode to the correct one */
2388 static int reset_nlink(struct btrfs_trans_handle *trans,
2389                        struct btrfs_root *root,
2390                        struct btrfs_path *path,
2391                        struct inode_record *rec)
2392 {
2393         struct inode_backref *backref;
2394         struct inode_backref *tmp;
2395         struct btrfs_key key;
2396         struct btrfs_inode_item *inode_item;
2397         int ret = 0;
2398
2399         /* We don't believe this either, reset it and iterate backref */
2400         rec->found_link = 0;
2401
2402         /* Remove all backref including the valid ones */
2403         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2404                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2405                                    backref->index, backref->name,
2406                                    backref->namelen, 0);
2407                 if (ret < 0)
2408                         goto out;
2409
2410                 /* remove invalid backref, so it won't be added back */
2411                 if (!(backref->found_dir_index &&
2412                       backref->found_dir_item &&
2413                       backref->found_inode_ref)) {
2414                         list_del(&backref->list);
2415                         free(backref);
2416                 } else {
2417                         rec->found_link++;
2418                 }
2419         }
2420
2421         /* Set nlink to 0 */
2422         key.objectid = rec->ino;
2423         key.type = BTRFS_INODE_ITEM_KEY;
2424         key.offset = 0;
2425         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2426         if (ret < 0)
2427                 goto out;
2428         if (ret > 0) {
2429                 ret = -ENOENT;
2430                 goto out;
2431         }
2432         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2433                                     struct btrfs_inode_item);
2434         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2435         btrfs_mark_buffer_dirty(path->nodes[0]);
2436         btrfs_release_path(path);
2437
2438         /*
2439          * Add back valid inode_ref/dir_item/dir_index,
2440          * add_link() will handle the nlink inc, so new nlink must be correct
2441          */
2442         list_for_each_entry(backref, &rec->backrefs, list) {
2443                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2444                                      backref->name, backref->namelen,
2445                                      backref->filetype, &backref->index, 1);
2446                 if (ret < 0)
2447                         goto out;
2448         }
2449 out:
2450         btrfs_release_path(path);
2451         return ret;
2452 }
2453
2454 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2455                                struct btrfs_root *root,
2456                                struct btrfs_path *path,
2457                                struct inode_record *rec)
2458 {
2459         char *dir_name = "lost+found";
2460         char namebuf[BTRFS_NAME_LEN] = {0};
2461         u64 lost_found_ino;
2462         u32 mode = 0700;
2463         u8 type = 0;
2464         int namelen = 0;
2465         int name_recovered = 0;
2466         int type_recovered = 0;
2467         int ret = 0;
2468
2469         /*
2470          * Get file name and type first before these invalid inode ref
2471          * are deleted by remove_all_invalid_backref()
2472          */
2473         name_recovered = !find_file_name(rec, namebuf, &namelen);
2474         type_recovered = !find_file_type(rec, &type);
2475
2476         if (!name_recovered) {
2477                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2478                        rec->ino, rec->ino);
2479                 namelen = count_digits(rec->ino);
2480                 sprintf(namebuf, "%llu", rec->ino);
2481                 name_recovered = 1;
2482         }
2483         if (!type_recovered) {
2484                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2485                        rec->ino);
2486                 type = BTRFS_FT_REG_FILE;
2487                 type_recovered = 1;
2488         }
2489
2490         ret = reset_nlink(trans, root, path, rec);
2491         if (ret < 0) {
2492                 fprintf(stderr,
2493                         "Failed to reset nlink for inode %llu: %s\n",
2494                         rec->ino, strerror(-ret));
2495                 goto out;
2496         }
2497
2498         if (rec->found_link == 0) {
2499                 lost_found_ino = root->highest_inode;
2500                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2501                         ret = -EOVERFLOW;
2502                         goto out;
2503                 }
2504                 lost_found_ino++;
2505                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2506                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2507                                   mode);
2508                 if (ret < 0) {
2509                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2510                                 dir_name, strerror(-ret));
2511                         goto out;
2512                 }
2513                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2514                                      namebuf, namelen, type, NULL, 1);
2515                 /*
2516                  * Add ".INO" suffix several times to handle case where
2517                  * "FILENAME.INO" is already taken by another file.
2518                  */
2519                 while (ret == -EEXIST) {
2520                         /*
2521                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2522                          */
2523                         if (namelen + count_digits(rec->ino) + 1 >
2524                             BTRFS_NAME_LEN) {
2525                                 ret = -EFBIG;
2526                                 goto out;
2527                         }
2528                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2529                                  ".%llu", rec->ino);
2530                         namelen += count_digits(rec->ino) + 1;
2531                         ret = btrfs_add_link(trans, root, rec->ino,
2532                                              lost_found_ino, namebuf,
2533                                              namelen, type, NULL, 1);
2534                 }
2535                 if (ret < 0) {
2536                         fprintf(stderr,
2537                                 "Failed to link the inode %llu to %s dir: %s\n",
2538                                 rec->ino, dir_name, strerror(-ret));
2539                         goto out;
2540                 }
2541                 /*
2542                  * Just increase the found_link, don't actually add the
2543                  * backref. This will make things easier and this inode
2544                  * record will be freed after the repair is done.
2545                  * So fsck will not report problem about this inode.
2546                  */
2547                 rec->found_link++;
2548                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2549                        namelen, namebuf, dir_name);
2550         }
2551         printf("Fixed the nlink of inode %llu\n", rec->ino);
2552 out:
2553         /*
2554          * Clear the flag anyway, or we will loop forever for the same inode
2555          * as it will not be removed from the bad inode list and the dead loop
2556          * happens.
2557          */
2558         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2559         btrfs_release_path(path);
2560         return ret;
2561 }
2562
2563 /*
2564  * Check if there is any normal(reg or prealloc) file extent for given
2565  * ino.
2566  * This is used to determine the file type when neither its dir_index/item or
2567  * inode_item exists.
2568  *
2569  * This will *NOT* report error, if any error happens, just consider it does
2570  * not have any normal file extent.
2571  */
2572 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2573 {
2574         struct btrfs_path *path;
2575         struct btrfs_key key;
2576         struct btrfs_key found_key;
2577         struct btrfs_file_extent_item *fi;
2578         u8 type;
2579         int ret = 0;
2580
2581         path = btrfs_alloc_path();
2582         if (!path)
2583                 goto out;
2584         key.objectid = ino;
2585         key.type = BTRFS_EXTENT_DATA_KEY;
2586         key.offset = 0;
2587
2588         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2589         if (ret < 0) {
2590                 ret = 0;
2591                 goto out;
2592         }
2593         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2594                 ret = btrfs_next_leaf(root, path);
2595                 if (ret) {
2596                         ret = 0;
2597                         goto out;
2598                 }
2599         }
2600         while (1) {
2601                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2602                                       path->slots[0]);
2603                 if (found_key.objectid != ino ||
2604                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2605                         break;
2606                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2607                                     struct btrfs_file_extent_item);
2608                 type = btrfs_file_extent_type(path->nodes[0], fi);
2609                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2610                         ret = 1;
2611                         goto out;
2612                 }
2613         }
2614 out:
2615         btrfs_free_path(path);
2616         return ret;
2617 }
2618
2619 static u32 btrfs_type_to_imode(u8 type)
2620 {
2621         static u32 imode_by_btrfs_type[] = {
2622                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2623                 [BTRFS_FT_DIR]          = S_IFDIR,
2624                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2625                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2626                 [BTRFS_FT_FIFO]         = S_IFIFO,
2627                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2628                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2629         };
2630
2631         return imode_by_btrfs_type[(type)];
2632 }
2633
2634 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2635                                 struct btrfs_root *root,
2636                                 struct btrfs_path *path,
2637                                 struct inode_record *rec)
2638 {
2639         u8 filetype;
2640         u32 mode = 0700;
2641         int type_recovered = 0;
2642         int ret = 0;
2643
2644         printf("Trying to rebuild inode:%llu\n", rec->ino);
2645
2646         type_recovered = !find_file_type(rec, &filetype);
2647
2648         /*
2649          * Try to determine inode type if type not found.
2650          *
2651          * For found regular file extent, it must be FILE.
2652          * For found dir_item/index, it must be DIR.
2653          *
2654          * For undetermined one, use FILE as fallback.
2655          *
2656          * TODO:
2657          * 1. If found backref(inode_index/item is already handled) to it,
2658          *    it must be DIR.
2659          *    Need new inode-inode ref structure to allow search for that.
2660          */
2661         if (!type_recovered) {
2662                 if (rec->found_file_extent &&
2663                     find_normal_file_extent(root, rec->ino)) {
2664                         type_recovered = 1;
2665                         filetype = BTRFS_FT_REG_FILE;
2666                 } else if (rec->found_dir_item) {
2667                         type_recovered = 1;
2668                         filetype = BTRFS_FT_DIR;
2669                 } else if (!list_empty(&rec->orphan_extents)) {
2670                         type_recovered = 1;
2671                         filetype = BTRFS_FT_REG_FILE;
2672                 } else{
2673                         printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2674                                rec->ino);
2675                         type_recovered = 1;
2676                         filetype = BTRFS_FT_REG_FILE;
2677                 }
2678         }
2679
2680         ret = btrfs_new_inode(trans, root, rec->ino,
2681                               mode | btrfs_type_to_imode(filetype));
2682         if (ret < 0)
2683                 goto out;
2684
2685         /*
2686          * Here inode rebuild is done, we only rebuild the inode item,
2687          * don't repair the nlink(like move to lost+found).
2688          * That is the job of nlink repair.
2689          *
2690          * We just fill the record and return
2691          */
2692         rec->found_dir_item = 1;
2693         rec->imode = mode | btrfs_type_to_imode(filetype);
2694         rec->nlink = 0;
2695         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2696         /* Ensure the inode_nlinks repair function will be called */
2697         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2698 out:
2699         return ret;
2700 }
2701
2702 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2703                                       struct btrfs_root *root,
2704                                       struct btrfs_path *path,
2705                                       struct inode_record *rec)
2706 {
2707         struct orphan_data_extent *orphan;
2708         struct orphan_data_extent *tmp;
2709         int ret = 0;
2710
2711         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2712                 /*
2713                  * Check for conflicting file extents
2714                  *
2715                  * Here we don't know whether the extents is compressed or not,
2716                  * so we can only assume it not compressed nor data offset,
2717                  * and use its disk_len as extent length.
2718                  */
2719                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2720                                        orphan->offset, orphan->disk_len, 0);
2721                 btrfs_release_path(path);
2722                 if (ret < 0)
2723                         goto out;
2724                 if (!ret) {
2725                         fprintf(stderr,
2726                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2727                                 orphan->disk_bytenr, orphan->disk_len);
2728                         ret = btrfs_free_extent(trans,
2729                                         root->fs_info->extent_root,
2730                                         orphan->disk_bytenr, orphan->disk_len,
2731                                         0, root->objectid, orphan->objectid,
2732                                         orphan->offset);
2733                         if (ret < 0)
2734                                 goto out;
2735                 }
2736                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2737                                 orphan->offset, orphan->disk_bytenr,
2738                                 orphan->disk_len, orphan->disk_len);
2739                 if (ret < 0)
2740                         goto out;
2741
2742                 /* Update file size info */
2743                 rec->found_size += orphan->disk_len;
2744                 if (rec->found_size == rec->nbytes)
2745                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2746
2747                 /* Update the file extent hole info too */
2748                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2749                                            orphan->disk_len);
2750                 if (ret < 0)
2751                         goto out;
2752                 if (RB_EMPTY_ROOT(&rec->holes))
2753                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2754
2755                 list_del(&orphan->list);
2756                 free(orphan);
2757         }
2758         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2759 out:
2760         return ret;
2761 }
2762
2763 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2764                                         struct btrfs_root *root,
2765                                         struct btrfs_path *path,
2766                                         struct inode_record *rec)
2767 {
2768         struct rb_node *node;
2769         struct file_extent_hole *hole;
2770         int found = 0;
2771         int ret = 0;
2772
2773         node = rb_first(&rec->holes);
2774
2775         while (node) {
2776                 found = 1;
2777                 hole = rb_entry(node, struct file_extent_hole, node);
2778                 ret = btrfs_punch_hole(trans, root, rec->ino,
2779                                        hole->start, hole->len);
2780                 if (ret < 0)
2781                         goto out;
2782                 ret = del_file_extent_hole(&rec->holes, hole->start,
2783                                            hole->len);
2784                 if (ret < 0)
2785                         goto out;
2786                 if (RB_EMPTY_ROOT(&rec->holes))
2787                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2788                 node = rb_first(&rec->holes);
2789         }
2790         /* special case for a file losing all its file extent */
2791         if (!found) {
2792                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2793                                        round_up(rec->isize, root->sectorsize));
2794                 if (ret < 0)
2795                         goto out;
2796         }
2797         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2798                rec->ino, root->objectid);
2799 out:
2800         return ret;
2801 }
2802
2803 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2804 {
2805         struct btrfs_trans_handle *trans;
2806         struct btrfs_path *path;
2807         int ret = 0;
2808
2809         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2810                              I_ERR_NO_ORPHAN_ITEM |
2811                              I_ERR_LINK_COUNT_WRONG |
2812                              I_ERR_NO_INODE_ITEM |
2813                              I_ERR_FILE_EXTENT_ORPHAN |
2814                              I_ERR_FILE_EXTENT_DISCOUNT|
2815                              I_ERR_FILE_NBYTES_WRONG)))
2816                 return rec->errors;
2817
2818         path = btrfs_alloc_path();
2819         if (!path)
2820                 return -ENOMEM;
2821
2822         /*
2823          * For nlink repair, it may create a dir and add link, so
2824          * 2 for parent(256)'s dir_index and dir_item
2825          * 2 for lost+found dir's inode_item and inode_ref
2826          * 1 for the new inode_ref of the file
2827          * 2 for lost+found dir's dir_index and dir_item for the file
2828          */
2829         trans = btrfs_start_transaction(root, 7);
2830         if (IS_ERR(trans)) {
2831                 btrfs_free_path(path);
2832                 return PTR_ERR(trans);
2833         }
2834
2835         if (rec->errors & I_ERR_NO_INODE_ITEM)
2836                 ret = repair_inode_no_item(trans, root, path, rec);
2837         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2838                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2839         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2840                 ret = repair_inode_discount_extent(trans, root, path, rec);
2841         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2842                 ret = repair_inode_isize(trans, root, path, rec);
2843         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2844                 ret = repair_inode_orphan_item(trans, root, path, rec);
2845         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2846                 ret = repair_inode_nlinks(trans, root, path, rec);
2847         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2848                 ret = repair_inode_nbytes(trans, root, path, rec);
2849         btrfs_commit_transaction(trans, root);
2850         btrfs_free_path(path);
2851         return ret;
2852 }
2853
2854 static int check_inode_recs(struct btrfs_root *root,
2855                             struct cache_tree *inode_cache)
2856 {
2857         struct cache_extent *cache;
2858         struct ptr_node *node;
2859         struct inode_record *rec;
2860         struct inode_backref *backref;
2861         int stage = 0;
2862         int ret = 0;
2863         int err = 0;
2864         u64 error = 0;
2865         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2866
2867         if (btrfs_root_refs(&root->root_item) == 0) {
2868                 if (!cache_tree_empty(inode_cache))
2869                         fprintf(stderr, "warning line %d\n", __LINE__);
2870                 return 0;
2871         }
2872
2873         /*
2874          * We need to record the highest inode number for later 'lost+found'
2875          * dir creation.
2876          * We must select a ino not used/refered by any existing inode, or
2877          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2878          * this may cause 'lost+found' dir has wrong nlinks.
2879          */
2880         cache = last_cache_extent(inode_cache);
2881         if (cache) {
2882                 node = container_of(cache, struct ptr_node, cache);
2883                 rec = node->data;
2884                 if (rec->ino > root->highest_inode)
2885                         root->highest_inode = rec->ino;
2886         }
2887
2888         /*
2889          * We need to repair backrefs first because we could change some of the
2890          * errors in the inode recs.
2891          *
2892          * We also need to go through and delete invalid backrefs first and then
2893          * add the correct ones second.  We do this because we may get EEXIST
2894          * when adding back the correct index because we hadn't yet deleted the
2895          * invalid index.
2896          *
2897          * For example, if we were missing a dir index then the directories
2898          * isize would be wrong, so if we fixed the isize to what we thought it
2899          * would be and then fixed the backref we'd still have a invalid fs, so
2900          * we need to add back the dir index and then check to see if the isize
2901          * is still wrong.
2902          */
2903         while (stage < 3) {
2904                 stage++;
2905                 if (stage == 3 && !err)
2906                         break;
2907
2908                 cache = search_cache_extent(inode_cache, 0);
2909                 while (repair && cache) {
2910                         node = container_of(cache, struct ptr_node, cache);
2911                         rec = node->data;
2912                         cache = next_cache_extent(cache);
2913
2914                         /* Need to free everything up and rescan */
2915                         if (stage == 3) {
2916                                 remove_cache_extent(inode_cache, &node->cache);
2917                                 free(node);
2918                                 free_inode_rec(rec);
2919                                 continue;
2920                         }
2921
2922                         if (list_empty(&rec->backrefs))
2923                                 continue;
2924
2925                         ret = repair_inode_backrefs(root, rec, inode_cache,
2926                                                     stage == 1);
2927                         if (ret < 0) {
2928                                 err = ret;
2929                                 stage = 2;
2930                                 break;
2931                         } if (ret > 0) {
2932                                 err = -EAGAIN;
2933                         }
2934                 }
2935         }
2936         if (err)
2937                 return err;
2938
2939         rec = get_inode_rec(inode_cache, root_dirid, 0);
2940         BUG_ON(IS_ERR(rec));
2941         if (rec) {
2942                 ret = check_root_dir(rec);
2943                 if (ret) {
2944                         fprintf(stderr, "root %llu root dir %llu error\n",
2945                                 (unsigned long long)root->root_key.objectid,
2946                                 (unsigned long long)root_dirid);
2947                         print_inode_error(root, rec);
2948                         error++;
2949                 }
2950         } else {
2951                 if (repair) {
2952                         struct btrfs_trans_handle *trans;
2953
2954                         trans = btrfs_start_transaction(root, 1);
2955                         if (IS_ERR(trans)) {
2956                                 err = PTR_ERR(trans);
2957                                 return err;
2958                         }
2959
2960                         fprintf(stderr,
2961                                 "root %llu missing its root dir, recreating\n",
2962                                 (unsigned long long)root->objectid);
2963
2964                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2965                         BUG_ON(ret);
2966
2967                         btrfs_commit_transaction(trans, root);
2968                         return -EAGAIN;
2969                 }
2970
2971                 fprintf(stderr, "root %llu root dir %llu not found\n",
2972                         (unsigned long long)root->root_key.objectid,
2973                         (unsigned long long)root_dirid);
2974         }
2975
2976         while (1) {
2977                 cache = search_cache_extent(inode_cache, 0);
2978                 if (!cache)
2979                         break;
2980                 node = container_of(cache, struct ptr_node, cache);
2981                 rec = node->data;
2982                 remove_cache_extent(inode_cache, &node->cache);
2983                 free(node);
2984                 if (rec->ino == root_dirid ||
2985                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2986                         free_inode_rec(rec);
2987                         continue;
2988                 }
2989
2990                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2991                         ret = check_orphan_item(root, rec->ino);
2992                         if (ret == 0)
2993                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2994                         if (can_free_inode_rec(rec)) {
2995                                 free_inode_rec(rec);
2996                                 continue;
2997                         }
2998                 }
2999
3000                 if (!rec->found_inode_item)
3001                         rec->errors |= I_ERR_NO_INODE_ITEM;
3002                 if (rec->found_link != rec->nlink)
3003                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3004                 if (repair) {
3005                         ret = try_repair_inode(root, rec);
3006                         if (ret == 0 && can_free_inode_rec(rec)) {
3007                                 free_inode_rec(rec);
3008                                 continue;
3009                         }
3010                         ret = 0;
3011                 }
3012
3013                 if (!(repair && ret == 0))
3014                         error++;
3015                 print_inode_error(root, rec);
3016                 list_for_each_entry(backref, &rec->backrefs, list) {
3017                         if (!backref->found_dir_item)
3018                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3019                         if (!backref->found_dir_index)
3020                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3021                         if (!backref->found_inode_ref)
3022                                 backref->errors |= REF_ERR_NO_INODE_REF;
3023                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3024                                 " namelen %u name %s filetype %d errors %x",
3025                                 (unsigned long long)backref->dir,
3026                                 (unsigned long long)backref->index,
3027                                 backref->namelen, backref->name,
3028                                 backref->filetype, backref->errors);
3029                         print_ref_error(backref->errors);
3030                 }
3031                 free_inode_rec(rec);
3032         }
3033         return (error > 0) ? -1 : 0;
3034 }
3035
3036 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3037                                         u64 objectid)
3038 {
3039         struct cache_extent *cache;
3040         struct root_record *rec = NULL;
3041         int ret;
3042
3043         cache = lookup_cache_extent(root_cache, objectid, 1);
3044         if (cache) {
3045                 rec = container_of(cache, struct root_record, cache);
3046         } else {
3047                 rec = calloc(1, sizeof(*rec));
3048                 if (!rec)
3049                         return ERR_PTR(-ENOMEM);
3050                 rec->objectid = objectid;
3051                 INIT_LIST_HEAD(&rec->backrefs);
3052                 rec->cache.start = objectid;
3053                 rec->cache.size = 1;
3054
3055                 ret = insert_cache_extent(root_cache, &rec->cache);
3056                 if (ret)
3057                         return ERR_PTR(-EEXIST);
3058         }
3059         return rec;
3060 }
3061
3062 static struct root_backref *get_root_backref(struct root_record *rec,
3063                                              u64 ref_root, u64 dir, u64 index,
3064                                              const char *name, int namelen)
3065 {
3066         struct root_backref *backref;
3067
3068         list_for_each_entry(backref, &rec->backrefs, list) {
3069                 if (backref->ref_root != ref_root || backref->dir != dir ||
3070                     backref->namelen != namelen)
3071                         continue;
3072                 if (memcmp(name, backref->name, namelen))
3073                         continue;
3074                 return backref;
3075         }
3076
3077         backref = calloc(1, sizeof(*backref) + namelen + 1);
3078         if (!backref)
3079                 return NULL;
3080         backref->ref_root = ref_root;
3081         backref->dir = dir;
3082         backref->index = index;
3083         backref->namelen = namelen;
3084         memcpy(backref->name, name, namelen);
3085         backref->name[namelen] = '\0';
3086         list_add_tail(&backref->list, &rec->backrefs);
3087         return backref;
3088 }
3089
3090 static void free_root_record(struct cache_extent *cache)
3091 {
3092         struct root_record *rec;
3093         struct root_backref *backref;
3094
3095         rec = container_of(cache, struct root_record, cache);
3096         while (!list_empty(&rec->backrefs)) {
3097                 backref = list_entry(rec->backrefs.next,
3098                                      struct root_backref, list);
3099                 list_del(&backref->list);
3100                 free(backref);
3101         }
3102
3103         kfree(rec);
3104 }
3105
3106 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3107
3108 static int add_root_backref(struct cache_tree *root_cache,
3109                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3110                             const char *name, int namelen,
3111                             int item_type, int errors)
3112 {
3113         struct root_record *rec;
3114         struct root_backref *backref;
3115
3116         rec = get_root_rec(root_cache, root_id);
3117         BUG_ON(IS_ERR(rec));
3118         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3119         BUG_ON(!backref);
3120
3121         backref->errors |= errors;
3122
3123         if (item_type != BTRFS_DIR_ITEM_KEY) {
3124                 if (backref->found_dir_index || backref->found_back_ref ||
3125                     backref->found_forward_ref) {
3126                         if (backref->index != index)
3127                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3128                 } else {
3129                         backref->index = index;
3130                 }
3131         }
3132
3133         if (item_type == BTRFS_DIR_ITEM_KEY) {
3134                 if (backref->found_forward_ref)
3135                         rec->found_ref++;
3136                 backref->found_dir_item = 1;
3137         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3138                 backref->found_dir_index = 1;
3139         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3140                 if (backref->found_forward_ref)
3141                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3142                 else if (backref->found_dir_item)
3143                         rec->found_ref++;
3144                 backref->found_forward_ref = 1;
3145         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3146                 if (backref->found_back_ref)
3147                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3148                 backref->found_back_ref = 1;
3149         } else {
3150                 BUG_ON(1);
3151         }
3152
3153         if (backref->found_forward_ref && backref->found_dir_item)
3154                 backref->reachable = 1;
3155         return 0;
3156 }
3157
3158 static int merge_root_recs(struct btrfs_root *root,
3159                            struct cache_tree *src_cache,
3160                            struct cache_tree *dst_cache)
3161 {
3162         struct cache_extent *cache;
3163         struct ptr_node *node;
3164         struct inode_record *rec;
3165         struct inode_backref *backref;
3166         int ret = 0;
3167
3168         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3169                 free_inode_recs_tree(src_cache);
3170                 return 0;
3171         }
3172
3173         while (1) {
3174                 cache = search_cache_extent(src_cache, 0);
3175                 if (!cache)
3176                         break;
3177                 node = container_of(cache, struct ptr_node, cache);
3178                 rec = node->data;
3179                 remove_cache_extent(src_cache, &node->cache);
3180                 free(node);
3181
3182                 ret = is_child_root(root, root->objectid, rec->ino);
3183                 if (ret < 0)
3184                         break;
3185                 else if (ret == 0)
3186                         goto skip;
3187
3188                 list_for_each_entry(backref, &rec->backrefs, list) {
3189                         BUG_ON(backref->found_inode_ref);
3190                         if (backref->found_dir_item)
3191                                 add_root_backref(dst_cache, rec->ino,
3192                                         root->root_key.objectid, backref->dir,
3193                                         backref->index, backref->name,
3194                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3195                                         backref->errors);
3196                         if (backref->found_dir_index)
3197                                 add_root_backref(dst_cache, rec->ino,
3198                                         root->root_key.objectid, backref->dir,
3199                                         backref->index, backref->name,
3200                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3201                                         backref->errors);
3202                 }
3203 skip:
3204                 free_inode_rec(rec);
3205         }
3206         if (ret < 0)
3207                 return ret;
3208         return 0;
3209 }
3210
3211 static int check_root_refs(struct btrfs_root *root,
3212                            struct cache_tree *root_cache)
3213 {
3214         struct root_record *rec;
3215         struct root_record *ref_root;
3216         struct root_backref *backref;
3217         struct cache_extent *cache;
3218         int loop = 1;
3219         int ret;
3220         int error;
3221         int errors = 0;
3222
3223         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3224         BUG_ON(IS_ERR(rec));
3225         rec->found_ref = 1;
3226
3227         /* fixme: this can not detect circular references */
3228         while (loop) {
3229                 loop = 0;
3230                 cache = search_cache_extent(root_cache, 0);
3231                 while (1) {
3232                         if (!cache)
3233                                 break;
3234                         rec = container_of(cache, struct root_record, cache);
3235                         cache = next_cache_extent(cache);
3236
3237                         if (rec->found_ref == 0)
3238                                 continue;
3239
3240                         list_for_each_entry(backref, &rec->backrefs, list) {
3241                                 if (!backref->reachable)
3242                                         continue;
3243
3244                                 ref_root = get_root_rec(root_cache,
3245                                                         backref->ref_root);
3246                                 BUG_ON(IS_ERR(ref_root));
3247                                 if (ref_root->found_ref > 0)
3248                                         continue;
3249
3250                                 backref->reachable = 0;
3251                                 rec->found_ref--;
3252                                 if (rec->found_ref == 0)
3253                                         loop = 1;
3254                         }
3255                 }
3256         }
3257
3258         cache = search_cache_extent(root_cache, 0);
3259         while (1) {
3260                 if (!cache)
3261                         break;
3262                 rec = container_of(cache, struct root_record, cache);
3263                 cache = next_cache_extent(cache);
3264
3265                 if (rec->found_ref == 0 &&
3266                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3267                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3268                         ret = check_orphan_item(root->fs_info->tree_root,
3269                                                 rec->objectid);
3270                         if (ret == 0)
3271                                 continue;
3272
3273                         /*
3274                          * If we don't have a root item then we likely just have
3275                          * a dir item in a snapshot for this root but no actual
3276                          * ref key or anything so it's meaningless.
3277                          */
3278                         if (!rec->found_root_item)
3279                                 continue;
3280                         errors++;
3281                         fprintf(stderr, "fs tree %llu not referenced\n",
3282                                 (unsigned long long)rec->objectid);
3283                 }
3284
3285                 error = 0;
3286                 if (rec->found_ref > 0 && !rec->found_root_item)
3287                         error = 1;
3288                 list_for_each_entry(backref, &rec->backrefs, list) {
3289                         if (!backref->found_dir_item)
3290                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3291                         if (!backref->found_dir_index)
3292                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3293                         if (!backref->found_back_ref)
3294                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3295                         if (!backref->found_forward_ref)
3296                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3297                         if (backref->reachable && backref->errors)
3298                                 error = 1;
3299                 }
3300                 if (!error)
3301                         continue;
3302
3303                 errors++;
3304                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3305                         (unsigned long long)rec->objectid, rec->found_ref,
3306                          rec->found_root_item ? "" : "not found");
3307
3308                 list_for_each_entry(backref, &rec->backrefs, list) {
3309                         if (!backref->reachable)
3310                                 continue;
3311                         if (!backref->errors && rec->found_root_item)
3312                                 continue;
3313                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3314                                 " index %llu namelen %u name %s errors %x\n",
3315                                 (unsigned long long)backref->ref_root,
3316                                 (unsigned long long)backref->dir,
3317                                 (unsigned long long)backref->index,
3318                                 backref->namelen, backref->name,
3319                                 backref->errors);
3320                         print_ref_error(backref->errors);
3321                 }
3322         }
3323         return errors > 0 ? 1 : 0;
3324 }
3325
3326 static int process_root_ref(struct extent_buffer *eb, int slot,
3327                             struct btrfs_key *key,
3328                             struct cache_tree *root_cache)
3329 {
3330         u64 dirid;
3331         u64 index;
3332         u32 len;
3333         u32 name_len;
3334         struct btrfs_root_ref *ref;
3335         char namebuf[BTRFS_NAME_LEN];
3336         int error;
3337
3338         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3339
3340         dirid = btrfs_root_ref_dirid(eb, ref);
3341         index = btrfs_root_ref_sequence(eb, ref);
3342         name_len = btrfs_root_ref_name_len(eb, ref);
3343
3344         if (name_len <= BTRFS_NAME_LEN) {
3345                 len = name_len;
3346                 error = 0;
3347         } else {
3348                 len = BTRFS_NAME_LEN;
3349                 error = REF_ERR_NAME_TOO_LONG;
3350         }
3351         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3352
3353         if (key->type == BTRFS_ROOT_REF_KEY) {
3354                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3355                                  index, namebuf, len, key->type, error);
3356         } else {
3357                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3358                                  index, namebuf, len, key->type, error);
3359         }
3360         return 0;
3361 }
3362
3363 static void free_corrupt_block(struct cache_extent *cache)
3364 {
3365         struct btrfs_corrupt_block *corrupt;
3366
3367         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3368         free(corrupt);
3369 }
3370
3371 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3372
3373 /*
3374  * Repair the btree of the given root.
3375  *
3376  * The fix is to remove the node key in corrupt_blocks cache_tree.
3377  * and rebalance the tree.
3378  * After the fix, the btree should be writeable.
3379  */
3380 static int repair_btree(struct btrfs_root *root,
3381                         struct cache_tree *corrupt_blocks)
3382 {
3383         struct btrfs_trans_handle *trans;
3384         struct btrfs_path *path;
3385         struct btrfs_corrupt_block *corrupt;
3386         struct cache_extent *cache;
3387         struct btrfs_key key;
3388         u64 offset;
3389         int level;
3390         int ret = 0;
3391
3392         if (cache_tree_empty(corrupt_blocks))
3393                 return 0;
3394
3395         path = btrfs_alloc_path();
3396         if (!path)
3397                 return -ENOMEM;
3398
3399         trans = btrfs_start_transaction(root, 1);
3400         if (IS_ERR(trans)) {
3401                 ret = PTR_ERR(trans);
3402                 fprintf(stderr, "Error starting transaction: %s\n",
3403                         strerror(-ret));
3404                 goto out_free_path;
3405         }
3406         cache = first_cache_extent(corrupt_blocks);
3407         while (cache) {
3408                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3409                                        cache);
3410                 level = corrupt->level;
3411                 path->lowest_level = level;
3412                 key.objectid = corrupt->key.objectid;
3413                 key.type = corrupt->key.type;
3414                 key.offset = corrupt->key.offset;
3415
3416                 /*
3417                  * Here we don't want to do any tree balance, since it may
3418                  * cause a balance with corrupted brother leaf/node,
3419                  * so ins_len set to 0 here.
3420                  * Balance will be done after all corrupt node/leaf is deleted.
3421                  */
3422                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3423                 if (ret < 0)
3424                         goto out;
3425                 offset = btrfs_node_blockptr(path->nodes[level],
3426                                              path->slots[level]);
3427
3428                 /* Remove the ptr */
3429                 ret = btrfs_del_ptr(trans, root, path, level,
3430                                     path->slots[level]);
3431                 if (ret < 0)
3432                         goto out;
3433                 /*
3434                  * Remove the corresponding extent
3435                  * return value is not concerned.
3436                  */
3437                 btrfs_release_path(path);
3438                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3439                                         0, root->root_key.objectid,
3440                                         level - 1, 0);
3441                 cache = next_cache_extent(cache);
3442         }
3443
3444         /* Balance the btree using btrfs_search_slot() */
3445         cache = first_cache_extent(corrupt_blocks);
3446         while (cache) {
3447                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3448                                        cache);
3449                 memcpy(&key, &corrupt->key, sizeof(key));
3450                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3451                 if (ret < 0)
3452                         goto out;
3453                 /* return will always >0 since it won't find the item */
3454                 ret = 0;
3455                 btrfs_release_path(path);
3456                 cache = next_cache_extent(cache);
3457         }
3458 out:
3459         btrfs_commit_transaction(trans, root);
3460 out_free_path:
3461         btrfs_free_path(path);
3462         return ret;
3463 }
3464
3465 static int check_fs_root(struct btrfs_root *root,
3466                          struct cache_tree *root_cache,
3467                          struct walk_control *wc)
3468 {
3469         int ret = 0;
3470         int err = 0;
3471         int wret;
3472         int level;
3473         struct btrfs_path path;
3474         struct shared_node root_node;
3475         struct root_record *rec;
3476         struct btrfs_root_item *root_item = &root->root_item;
3477         struct cache_tree corrupt_blocks;
3478         struct orphan_data_extent *orphan;
3479         struct orphan_data_extent *tmp;
3480         enum btrfs_tree_block_status status;
3481
3482         /*
3483          * Reuse the corrupt_block cache tree to record corrupted tree block
3484          *
3485          * Unlike the usage in extent tree check, here we do it in a per
3486          * fs/subvol tree base.
3487          */
3488         cache_tree_init(&corrupt_blocks);
3489         root->fs_info->corrupt_blocks = &corrupt_blocks;
3490
3491         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3492                 rec = get_root_rec(root_cache, root->root_key.objectid);
3493                 BUG_ON(IS_ERR(rec));
3494                 if (btrfs_root_refs(root_item) > 0)
3495                         rec->found_root_item = 1;
3496         }
3497
3498         btrfs_init_path(&path);
3499         memset(&root_node, 0, sizeof(root_node));
3500         cache_tree_init(&root_node.root_cache);
3501         cache_tree_init(&root_node.inode_cache);
3502
3503         /* Move the orphan extent record to corresponding inode_record */
3504         list_for_each_entry_safe(orphan, tmp,
3505                                  &root->orphan_data_extents, list) {
3506                 struct inode_record *inode;
3507
3508                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3509                                       1);
3510                 BUG_ON(IS_ERR(inode));
3511                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3512                 list_move(&orphan->list, &inode->orphan_extents);
3513         }
3514
3515         level = btrfs_header_level(root->node);
3516         memset(wc->nodes, 0, sizeof(wc->nodes));
3517         wc->nodes[level] = &root_node;
3518         wc->active_node = level;
3519         wc->root_level = level;
3520
3521         /* We may not have checked the root block, lets do that now */
3522         if (btrfs_is_leaf(root->node))
3523                 status = btrfs_check_leaf(root, NULL, root->node);
3524         else
3525                 status = btrfs_check_node(root, NULL, root->node);
3526         if (status != BTRFS_TREE_BLOCK_CLEAN)
3527                 return -EIO;
3528
3529         if (btrfs_root_refs(root_item) > 0 ||
3530             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3531                 path.nodes[level] = root->node;
3532                 extent_buffer_get(root->node);
3533                 path.slots[level] = 0;
3534         } else {
3535                 struct btrfs_key key;
3536                 struct btrfs_disk_key found_key;
3537
3538                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3539                 level = root_item->drop_level;
3540                 path.lowest_level = level;
3541                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3542                 if (wret < 0)
3543                         goto skip_walking;
3544                 btrfs_node_key(path.nodes[level], &found_key,
3545                                 path.slots[level]);
3546                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3547                                         sizeof(found_key)));
3548         }
3549
3550         while (1) {
3551                 wret = walk_down_tree(root, &path, wc, &level);
3552                 if (wret < 0)
3553                         ret = wret;
3554                 if (wret != 0)
3555                         break;
3556
3557                 wret = walk_up_tree(root, &path, wc, &level);
3558                 if (wret < 0)
3559                         ret = wret;
3560                 if (wret != 0)
3561                         break;
3562         }
3563 skip_walking:
3564         btrfs_release_path(&path);
3565
3566         if (!cache_tree_empty(&corrupt_blocks)) {
3567                 struct cache_extent *cache;
3568                 struct btrfs_corrupt_block *corrupt;
3569
3570                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3571                        root->root_key.objectid);
3572                 cache = first_cache_extent(&corrupt_blocks);
3573                 while (cache) {
3574                         corrupt = container_of(cache,
3575                                                struct btrfs_corrupt_block,
3576                                                cache);
3577                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3578                                cache->start, corrupt->level,
3579                                corrupt->key.objectid, corrupt->key.type,
3580                                corrupt->key.offset);
3581                         cache = next_cache_extent(cache);
3582                 }
3583                 if (repair) {
3584                         printf("Try to repair the btree for root %llu\n",
3585                                root->root_key.objectid);
3586                         ret = repair_btree(root, &corrupt_blocks);
3587                         if (ret < 0)
3588                                 fprintf(stderr, "Failed to repair btree: %s\n",
3589                                         strerror(-ret));
3590                         if (!ret)
3591                                 printf("Btree for root %llu is fixed\n",
3592                                        root->root_key.objectid);
3593                 }
3594         }
3595
3596         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3597         if (err < 0)
3598                 ret = err;
3599
3600         if (root_node.current) {
3601                 root_node.current->checked = 1;
3602                 maybe_free_inode_rec(&root_node.inode_cache,
3603                                 root_node.current);
3604         }
3605
3606         err = check_inode_recs(root, &root_node.inode_cache);
3607         if (!ret)
3608                 ret = err;
3609
3610         free_corrupt_blocks_tree(&corrupt_blocks);
3611         root->fs_info->corrupt_blocks = NULL;
3612         free_orphan_data_extents(&root->orphan_data_extents);
3613         return ret;
3614 }
3615
3616 static int fs_root_objectid(u64 objectid)
3617 {
3618         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3619             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3620                 return 1;
3621         return is_fstree(objectid);
3622 }
3623
3624 static int check_fs_roots(struct btrfs_root *root,
3625                           struct cache_tree *root_cache)
3626 {
3627         struct btrfs_path path;
3628         struct btrfs_key key;
3629         struct walk_control wc;
3630         struct extent_buffer *leaf, *tree_node;
3631         struct btrfs_root *tmp_root;
3632         struct btrfs_root *tree_root = root->fs_info->tree_root;
3633         int ret;
3634         int err = 0;
3635
3636         if (ctx.progress_enabled) {
3637                 ctx.tp = TASK_FS_ROOTS;
3638                 task_start(ctx.info);
3639         }
3640
3641         /*
3642          * Just in case we made any changes to the extent tree that weren't
3643          * reflected into the free space cache yet.
3644          */
3645         if (repair)
3646                 reset_cached_block_groups(root->fs_info);
3647         memset(&wc, 0, sizeof(wc));
3648         cache_tree_init(&wc.shared);
3649         btrfs_init_path(&path);
3650
3651 again:
3652         key.offset = 0;
3653         key.objectid = 0;
3654         key.type = BTRFS_ROOT_ITEM_KEY;
3655         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3656         if (ret < 0) {
3657                 err = 1;
3658                 goto out;
3659         }
3660         tree_node = tree_root->node;
3661         while (1) {
3662                 if (tree_node != tree_root->node) {
3663                         free_root_recs_tree(root_cache);
3664                         btrfs_release_path(&path);
3665                         goto again;
3666                 }
3667                 leaf = path.nodes[0];
3668                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3669                         ret = btrfs_next_leaf(tree_root, &path);
3670                         if (ret) {
3671                                 if (ret < 0)
3672                                         err = 1;
3673                                 break;
3674                         }
3675                         leaf = path.nodes[0];
3676                 }
3677                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3678                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3679                     fs_root_objectid(key.objectid)) {
3680                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3681                                 tmp_root = btrfs_read_fs_root_no_cache(
3682                                                 root->fs_info, &key);
3683                         } else {
3684                                 key.offset = (u64)-1;
3685                                 tmp_root = btrfs_read_fs_root(
3686                                                 root->fs_info, &key);
3687                         }
3688                         if (IS_ERR(tmp_root)) {
3689                                 err = 1;
3690                                 goto next;
3691                         }
3692                         ret = check_fs_root(tmp_root, root_cache, &wc);
3693                         if (ret == -EAGAIN) {
3694                                 free_root_recs_tree(root_cache);
3695                                 btrfs_release_path(&path);
3696                                 goto again;
3697                         }
3698                         if (ret)
3699                                 err = 1;
3700                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3701                                 btrfs_free_fs_root(tmp_root);
3702                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3703                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3704                         process_root_ref(leaf, path.slots[0], &key,
3705                                          root_cache);
3706                 }
3707 next:
3708                 path.slots[0]++;
3709         }
3710 out:
3711         btrfs_release_path(&path);
3712         if (err)
3713                 free_extent_cache_tree(&wc.shared);
3714         if (!cache_tree_empty(&wc.shared))
3715                 fprintf(stderr, "warning line %d\n", __LINE__);
3716
3717         task_stop(ctx.info);
3718
3719         return err;
3720 }
3721
3722 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3723 {
3724         struct list_head *cur = rec->backrefs.next;
3725         struct extent_backref *back;
3726         struct tree_backref *tback;
3727         struct data_backref *dback;
3728         u64 found = 0;
3729         int err = 0;
3730
3731         while(cur != &rec->backrefs) {
3732                 back = list_entry(cur, struct extent_backref, list);
3733                 cur = cur->next;
3734                 if (!back->found_extent_tree) {
3735                         err = 1;
3736                         if (!print_errs)
3737                                 goto out;
3738                         if (back->is_data) {
3739                                 dback = (struct data_backref *)back;
3740                                 fprintf(stderr, "Backref %llu %s %llu"
3741                                         " owner %llu offset %llu num_refs %lu"
3742                                         " not found in extent tree\n",
3743                                         (unsigned long long)rec->start,
3744                                         back->full_backref ?
3745                                         "parent" : "root",
3746                                         back->full_backref ?
3747                                         (unsigned long long)dback->parent:
3748                                         (unsigned long long)dback->root,
3749                                         (unsigned long long)dback->owner,
3750                                         (unsigned long long)dback->offset,
3751                                         (unsigned long)dback->num_refs);
3752                         } else {
3753                                 tback = (struct tree_backref *)back;
3754                                 fprintf(stderr, "Backref %llu parent %llu"
3755                                         " root %llu not found in extent tree\n",
3756                                         (unsigned long long)rec->start,
3757                                         (unsigned long long)tback->parent,
3758                                         (unsigned long long)tback->root);
3759                         }
3760                 }
3761                 if (!back->is_data && !back->found_ref) {
3762                         err = 1;
3763                         if (!print_errs)
3764                                 goto out;
3765                         tback = (struct tree_backref *)back;
3766                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3767                                 (unsigned long long)rec->start,
3768                                 back->full_backref ? "parent" : "root",
3769                                 back->full_backref ?
3770                                 (unsigned long long)tback->parent :
3771                                 (unsigned long long)tback->root, back);
3772                 }
3773                 if (back->is_data) {
3774                         dback = (struct data_backref *)back;
3775                         if (dback->found_ref != dback->num_refs) {
3776                                 err = 1;
3777                                 if (!print_errs)
3778                                         goto out;
3779                                 fprintf(stderr, "Incorrect local backref count"
3780                                         " on %llu %s %llu owner %llu"
3781                                         " offset %llu found %u wanted %u back %p\n",
3782                                         (unsigned long long)rec->start,
3783                                         back->full_backref ?
3784                                         "parent" : "root",
3785                                         back->full_backref ?
3786                                         (unsigned long long)dback->parent:
3787                                         (unsigned long long)dback->root,
3788                                         (unsigned long long)dback->owner,
3789                                         (unsigned long long)dback->offset,
3790                                         dback->found_ref, dback->num_refs, back);
3791                         }
3792                         if (dback->disk_bytenr != rec->start) {
3793                                 err = 1;
3794                                 if (!print_errs)
3795                                         goto out;
3796                                 fprintf(stderr, "Backref disk bytenr does not"
3797                                         " match extent record, bytenr=%llu, "
3798                                         "ref bytenr=%llu\n",
3799                                         (unsigned long long)rec->start,
3800                                         (unsigned long long)dback->disk_bytenr);
3801                         }
3802
3803                         if (dback->bytes != rec->nr) {
3804                                 err = 1;
3805                                 if (!print_errs)
3806                                         goto out;
3807                                 fprintf(stderr, "Backref bytes do not match "
3808                                         "extent backref, bytenr=%llu, ref "
3809                                         "bytes=%llu, backref bytes=%llu\n",
3810                                         (unsigned long long)rec->start,
3811                                         (unsigned long long)rec->nr,
3812                                         (unsigned long long)dback->bytes);
3813                         }
3814                 }
3815                 if (!back->is_data) {
3816                         found += 1;
3817                 } else {
3818                         dback = (struct data_backref *)back;
3819                         found += dback->found_ref;
3820                 }
3821         }
3822         if (found != rec->refs) {
3823                 err = 1;
3824                 if (!print_errs)
3825                         goto out;
3826                 fprintf(stderr, "Incorrect global backref count "
3827                         "on %llu found %llu wanted %llu\n",
3828                         (unsigned long long)rec->start,
3829                         (unsigned long long)found,
3830                         (unsigned long long)rec->refs);
3831         }
3832 out:
3833         return err;
3834 }
3835
3836 static int free_all_extent_backrefs(struct extent_record *rec)
3837 {
3838         struct extent_backref *back;
3839         struct list_head *cur;
3840         while (!list_empty(&rec->backrefs)) {
3841                 cur = rec->backrefs.next;
3842                 back = list_entry(cur, struct extent_backref, list);
3843                 list_del(cur);
3844                 free(back);
3845         }
3846         return 0;
3847 }
3848
3849 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3850                                      struct cache_tree *extent_cache)
3851 {
3852         struct cache_extent *cache;
3853         struct extent_record *rec;
3854
3855         while (1) {
3856                 cache = first_cache_extent(extent_cache);
3857                 if (!cache)
3858                         break;
3859                 rec = container_of(cache, struct extent_record, cache);
3860                 remove_cache_extent(extent_cache, cache);
3861                 free_all_extent_backrefs(rec);
3862                 free(rec);
3863         }
3864 }
3865
3866 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3867                                  struct extent_record *rec)
3868 {
3869         if (rec->content_checked && rec->owner_ref_checked &&
3870             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3871             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3872             !rec->bad_full_backref && !rec->crossing_stripes &&
3873             !rec->wrong_chunk_type) {
3874                 remove_cache_extent(extent_cache, &rec->cache);
3875                 free_all_extent_backrefs(rec);
3876                 list_del_init(&rec->list);
3877                 free(rec);
3878         }
3879         return 0;
3880 }
3881
3882 static int check_owner_ref(struct btrfs_root *root,
3883                             struct extent_record *rec,
3884                             struct extent_buffer *buf)
3885 {
3886         struct extent_backref *node;
3887         struct tree_backref *back;
3888         struct btrfs_root *ref_root;
3889         struct btrfs_key key;
3890         struct btrfs_path path;
3891         struct extent_buffer *parent;
3892         int level;
3893         int found = 0;
3894         int ret;
3895
3896         list_for_each_entry(node, &rec->backrefs, list) {
3897                 if (node->is_data)
3898                         continue;
3899                 if (!node->found_ref)
3900                         continue;
3901                 if (node->full_backref)
3902                         continue;
3903                 back = (struct tree_backref *)node;
3904                 if (btrfs_header_owner(buf) == back->root)
3905                         return 0;
3906         }
3907         BUG_ON(rec->is_root);
3908
3909         /* try to find the block by search corresponding fs tree */
3910         key.objectid = btrfs_header_owner(buf);
3911         key.type = BTRFS_ROOT_ITEM_KEY;
3912         key.offset = (u64)-1;
3913
3914         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3915         if (IS_ERR(ref_root))
3916                 return 1;
3917
3918         level = btrfs_header_level(buf);
3919         if (level == 0)
3920                 btrfs_item_key_to_cpu(buf, &key, 0);
3921         else
3922                 btrfs_node_key_to_cpu(buf, &key, 0);
3923
3924         btrfs_init_path(&path);
3925         path.lowest_level = level + 1;
3926         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3927         if (ret < 0)
3928                 return 0;
3929
3930         parent = path.nodes[level + 1];
3931         if (parent && buf->start == btrfs_node_blockptr(parent,
3932                                                         path.slots[level + 1]))
3933                 found = 1;
3934
3935         btrfs_release_path(&path);
3936         return found ? 0 : 1;
3937 }
3938
3939 static int is_extent_tree_record(struct extent_record *rec)
3940 {
3941         struct list_head *cur = rec->backrefs.next;
3942         struct extent_backref *node;
3943         struct tree_backref *back;
3944         int is_extent = 0;
3945
3946         while(cur != &rec->backrefs) {
3947                 node = list_entry(cur, struct extent_backref, list);
3948                 cur = cur->next;
3949                 if (node->is_data)
3950                         return 0;
3951                 back = (struct tree_backref *)node;
3952                 if (node->full_backref)
3953                         return 0;
3954                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3955                         is_extent = 1;
3956         }
3957         return is_extent;
3958 }
3959
3960
3961 static int record_bad_block_io(struct btrfs_fs_info *info,
3962                                struct cache_tree *extent_cache,
3963                                u64 start, u64 len)
3964 {
3965         struct extent_record *rec;
3966         struct cache_extent *cache;
3967         struct btrfs_key key;
3968
3969         cache = lookup_cache_extent(extent_cache, start, len);
3970         if (!cache)
3971                 return 0;
3972
3973         rec = container_of(cache, struct extent_record, cache);
3974         if (!is_extent_tree_record(rec))
3975                 return 0;
3976
3977         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3978         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3979 }
3980
3981 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3982                        struct extent_buffer *buf, int slot)
3983 {
3984         if (btrfs_header_level(buf)) {
3985                 struct btrfs_key_ptr ptr1, ptr2;
3986
3987                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3988                                    sizeof(struct btrfs_key_ptr));
3989                 read_extent_buffer(buf, &ptr2,
3990                                    btrfs_node_key_ptr_offset(slot + 1),
3991                                    sizeof(struct btrfs_key_ptr));
3992                 write_extent_buffer(buf, &ptr1,
3993                                     btrfs_node_key_ptr_offset(slot + 1),
3994                                     sizeof(struct btrfs_key_ptr));
3995                 write_extent_buffer(buf, &ptr2,
3996                                     btrfs_node_key_ptr_offset(slot),
3997                                     sizeof(struct btrfs_key_ptr));
3998                 if (slot == 0) {
3999                         struct btrfs_disk_key key;
4000                         btrfs_node_key(buf, &key, 0);
4001                         btrfs_fixup_low_keys(root, path, &key,
4002                                              btrfs_header_level(buf) + 1);
4003                 }
4004         } else {
4005                 struct btrfs_item *item1, *item2;
4006                 struct btrfs_key k1, k2;
4007                 char *item1_data, *item2_data;
4008                 u32 item1_offset, item2_offset, item1_size, item2_size;
4009
4010                 item1 = btrfs_item_nr(slot);
4011                 item2 = btrfs_item_nr(slot + 1);
4012                 btrfs_item_key_to_cpu(buf, &k1, slot);
4013                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4014                 item1_offset = btrfs_item_offset(buf, item1);
4015                 item2_offset = btrfs_item_offset(buf, item2);
4016                 item1_size = btrfs_item_size(buf, item1);
4017                 item2_size = btrfs_item_size(buf, item2);
4018
4019                 item1_data = malloc(item1_size);
4020                 if (!item1_data)
4021                         return -ENOMEM;
4022                 item2_data = malloc(item2_size);
4023                 if (!item2_data) {
4024                         free(item1_data);
4025                         return -ENOMEM;
4026                 }
4027
4028                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4029                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4030
4031                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4032                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4033                 free(item1_data);
4034                 free(item2_data);
4035
4036                 btrfs_set_item_offset(buf, item1, item2_offset);
4037                 btrfs_set_item_offset(buf, item2, item1_offset);
4038                 btrfs_set_item_size(buf, item1, item2_size);
4039                 btrfs_set_item_size(buf, item2, item1_size);
4040
4041                 path->slots[0] = slot;
4042                 btrfs_set_item_key_unsafe(root, path, &k2);
4043                 path->slots[0] = slot + 1;
4044                 btrfs_set_item_key_unsafe(root, path, &k1);
4045         }
4046         return 0;
4047 }
4048
4049 static int fix_key_order(struct btrfs_trans_handle *trans,
4050                          struct btrfs_root *root,
4051                          struct btrfs_path *path)
4052 {
4053         struct extent_buffer *buf;
4054         struct btrfs_key k1, k2;
4055         int i;
4056         int level = path->lowest_level;
4057         int ret = -EIO;
4058
4059         buf = path->nodes[level];
4060         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4061                 if (level) {
4062                         btrfs_node_key_to_cpu(buf, &k1, i);
4063                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4064                 } else {
4065                         btrfs_item_key_to_cpu(buf, &k1, i);
4066                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4067                 }
4068                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4069                         continue;
4070                 ret = swap_values(root, path, buf, i);
4071                 if (ret)
4072                         break;
4073                 btrfs_mark_buffer_dirty(buf);
4074                 i = 0;
4075         }
4076         return ret;
4077 }
4078
4079 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4080                              struct btrfs_root *root,
4081                              struct btrfs_path *path,
4082                              struct extent_buffer *buf, int slot)
4083 {
4084         struct btrfs_key key;
4085         int nritems = btrfs_header_nritems(buf);
4086
4087         btrfs_item_key_to_cpu(buf, &key, slot);
4088
4089         /* These are all the keys we can deal with missing. */
4090         if (key.type != BTRFS_DIR_INDEX_KEY &&
4091             key.type != BTRFS_EXTENT_ITEM_KEY &&
4092             key.type != BTRFS_METADATA_ITEM_KEY &&
4093             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4094             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4095                 return -1;
4096
4097         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4098                (unsigned long long)key.objectid, key.type,
4099                (unsigned long long)key.offset, slot, buf->start);
4100         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4101                               btrfs_item_nr_offset(slot + 1),
4102                               sizeof(struct btrfs_item) *
4103                               (nritems - slot - 1));
4104         btrfs_set_header_nritems(buf, nritems - 1);
4105         if (slot == 0) {
4106                 struct btrfs_disk_key disk_key;
4107
4108                 btrfs_item_key(buf, &disk_key, 0);
4109                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4110         }
4111         btrfs_mark_buffer_dirty(buf);
4112         return 0;
4113 }
4114
4115 static int fix_item_offset(struct btrfs_trans_handle *trans,
4116                            struct btrfs_root *root,
4117                            struct btrfs_path *path)
4118 {
4119         struct extent_buffer *buf;
4120         int i;
4121         int ret = 0;
4122
4123         /* We should only get this for leaves */
4124         BUG_ON(path->lowest_level);
4125         buf = path->nodes[0];
4126 again:
4127         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4128                 unsigned int shift = 0, offset;
4129
4130                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4131                     BTRFS_LEAF_DATA_SIZE(root)) {
4132                         if (btrfs_item_end_nr(buf, i) >
4133                             BTRFS_LEAF_DATA_SIZE(root)) {
4134                                 ret = delete_bogus_item(trans, root, path,
4135                                                         buf, i);
4136                                 if (!ret)
4137                                         goto again;
4138                                 fprintf(stderr, "item is off the end of the "
4139                                         "leaf, can't fix\n");
4140                                 ret = -EIO;
4141                                 break;
4142                         }
4143                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4144                                 btrfs_item_end_nr(buf, i);
4145                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4146                            btrfs_item_offset_nr(buf, i - 1)) {
4147                         if (btrfs_item_end_nr(buf, i) >
4148                             btrfs_item_offset_nr(buf, i - 1)) {
4149                                 ret = delete_bogus_item(trans, root, path,
4150                                                         buf, i);
4151                                 if (!ret)
4152                                         goto again;
4153                                 fprintf(stderr, "items overlap, can't fix\n");
4154                                 ret = -EIO;
4155                                 break;
4156                         }
4157                         shift = btrfs_item_offset_nr(buf, i - 1) -
4158                                 btrfs_item_end_nr(buf, i);
4159                 }
4160                 if (!shift)
4161                         continue;
4162
4163                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4164                        i, shift, (unsigned long long)buf->start);
4165                 offset = btrfs_item_offset_nr(buf, i);
4166                 memmove_extent_buffer(buf,
4167                                       btrfs_leaf_data(buf) + offset + shift,
4168                                       btrfs_leaf_data(buf) + offset,
4169                                       btrfs_item_size_nr(buf, i));
4170                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4171                                       offset + shift);
4172                 btrfs_mark_buffer_dirty(buf);
4173         }
4174
4175         /*
4176          * We may have moved things, in which case we want to exit so we don't
4177          * write those changes out.  Once we have proper abort functionality in
4178          * progs this can be changed to something nicer.
4179          */
4180         BUG_ON(ret);
4181         return ret;
4182 }
4183
4184 /*
4185  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4186  * then just return -EIO.
4187  */
4188 static int try_to_fix_bad_block(struct btrfs_root *root,
4189                                 struct extent_buffer *buf,
4190                                 enum btrfs_tree_block_status status)
4191 {
4192         struct btrfs_trans_handle *trans;
4193         struct ulist *roots;
4194         struct ulist_node *node;
4195         struct btrfs_root *search_root;
4196         struct btrfs_path *path;
4197         struct ulist_iterator iter;
4198         struct btrfs_key root_key, key;
4199         int ret;
4200
4201         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4202             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4203                 return -EIO;
4204
4205         path = btrfs_alloc_path();
4206         if (!path)
4207                 return -EIO;
4208
4209         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4210                                    0, &roots);
4211         if (ret) {
4212                 btrfs_free_path(path);
4213                 return -EIO;
4214         }
4215
4216         ULIST_ITER_INIT(&iter);
4217         while ((node = ulist_next(roots, &iter))) {
4218                 root_key.objectid = node->val;
4219                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4220                 root_key.offset = (u64)-1;
4221
4222                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4223                 if (IS_ERR(root)) {
4224                         ret = -EIO;
4225                         break;
4226                 }
4227
4228
4229                 trans = btrfs_start_transaction(search_root, 0);
4230                 if (IS_ERR(trans)) {
4231                         ret = PTR_ERR(trans);
4232                         break;
4233                 }
4234
4235                 path->lowest_level = btrfs_header_level(buf);
4236                 path->skip_check_block = 1;
4237                 if (path->lowest_level)
4238                         btrfs_node_key_to_cpu(buf, &key, 0);
4239                 else
4240                         btrfs_item_key_to_cpu(buf, &key, 0);
4241                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4242                 if (ret) {
4243                         ret = -EIO;
4244                         btrfs_commit_transaction(trans, search_root);
4245                         break;
4246                 }
4247                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4248                         ret = fix_key_order(trans, search_root, path);
4249                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4250                         ret = fix_item_offset(trans, search_root, path);
4251                 if (ret) {
4252                         btrfs_commit_transaction(trans, search_root);
4253                         break;
4254                 }
4255                 btrfs_release_path(path);
4256                 btrfs_commit_transaction(trans, search_root);
4257         }
4258         ulist_free(roots);
4259         btrfs_free_path(path);
4260         return ret;
4261 }
4262
4263 static int check_block(struct btrfs_root *root,
4264                        struct cache_tree *extent_cache,
4265                        struct extent_buffer *buf, u64 flags)
4266 {
4267         struct extent_record *rec;
4268         struct cache_extent *cache;
4269         struct btrfs_key key;
4270         enum btrfs_tree_block_status status;
4271         int ret = 0;
4272         int level;
4273
4274         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4275         if (!cache)
4276                 return 1;
4277         rec = container_of(cache, struct extent_record, cache);
4278         rec->generation = btrfs_header_generation(buf);
4279
4280         level = btrfs_header_level(buf);
4281         if (btrfs_header_nritems(buf) > 0) {
4282
4283                 if (level == 0)
4284                         btrfs_item_key_to_cpu(buf, &key, 0);
4285                 else
4286                         btrfs_node_key_to_cpu(buf, &key, 0);
4287
4288                 rec->info_objectid = key.objectid;
4289         }
4290         rec->info_level = level;
4291
4292         if (btrfs_is_leaf(buf))
4293                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4294         else
4295                 status = btrfs_check_node(root, &rec->parent_key, buf);
4296
4297         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4298                 if (repair)
4299                         status = try_to_fix_bad_block(root, buf, status);
4300                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4301                         ret = -EIO;
4302                         fprintf(stderr, "bad block %llu\n",
4303                                 (unsigned long long)buf->start);
4304                 } else {
4305                         /*
4306                          * Signal to callers we need to start the scan over
4307                          * again since we'll have cow'ed blocks.
4308                          */
4309                         ret = -EAGAIN;
4310                 }
4311         } else {
4312                 rec->content_checked = 1;
4313                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4314                         rec->owner_ref_checked = 1;
4315                 else {
4316                         ret = check_owner_ref(root, rec, buf);
4317                         if (!ret)
4318                                 rec->owner_ref_checked = 1;
4319                 }
4320         }
4321         if (!ret)
4322                 maybe_free_extent_rec(extent_cache, rec);
4323         return ret;
4324 }
4325
4326 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4327                                                 u64 parent, u64 root)
4328 {
4329         struct list_head *cur = rec->backrefs.next;
4330         struct extent_backref *node;
4331         struct tree_backref *back;
4332
4333         while(cur != &rec->backrefs) {
4334                 node = list_entry(cur, struct extent_backref, list);
4335                 cur = cur->next;
4336                 if (node->is_data)
4337                         continue;
4338                 back = (struct tree_backref *)node;
4339                 if (parent > 0) {
4340                         if (!node->full_backref)
4341                                 continue;
4342                         if (parent == back->parent)
4343                                 return back;
4344                 } else {
4345                         if (node->full_backref)
4346                                 continue;
4347                         if (back->root == root)
4348                                 return back;
4349                 }
4350         }
4351         return NULL;
4352 }
4353
4354 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4355                                                 u64 parent, u64 root)
4356 {
4357         struct tree_backref *ref = malloc(sizeof(*ref));
4358
4359         if (!ref)
4360                 return NULL;
4361         memset(&ref->node, 0, sizeof(ref->node));
4362         if (parent > 0) {
4363                 ref->parent = parent;
4364                 ref->node.full_backref = 1;
4365         } else {
4366                 ref->root = root;
4367                 ref->node.full_backref = 0;
4368         }
4369         list_add_tail(&ref->node.list, &rec->backrefs);
4370
4371         return ref;
4372 }
4373
4374 static struct data_backref *find_data_backref(struct extent_record *rec,
4375                                                 u64 parent, u64 root,
4376                                                 u64 owner, u64 offset,
4377                                                 int found_ref,
4378                                                 u64 disk_bytenr, u64 bytes)
4379 {
4380         struct list_head *cur = rec->backrefs.next;
4381         struct extent_backref *node;
4382         struct data_backref *back;
4383
4384         while(cur != &rec->backrefs) {
4385                 node = list_entry(cur, struct extent_backref, list);
4386                 cur = cur->next;
4387                 if (!node->is_data)
4388                         continue;
4389                 back = (struct data_backref *)node;
4390                 if (parent > 0) {
4391                         if (!node->full_backref)
4392                                 continue;
4393                         if (parent == back->parent)
4394                                 return back;
4395                 } else {
4396                         if (node->full_backref)
4397                                 continue;
4398                         if (back->root == root && back->owner == owner &&
4399                             back->offset == offset) {
4400                                 if (found_ref && node->found_ref &&
4401                                     (back->bytes != bytes ||
4402                                     back->disk_bytenr != disk_bytenr))
4403                                         continue;
4404                                 return back;
4405                         }
4406                 }
4407         }
4408         return NULL;
4409 }
4410
4411 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4412                                                 u64 parent, u64 root,
4413                                                 u64 owner, u64 offset,
4414                                                 u64 max_size)
4415 {
4416         struct data_backref *ref = malloc(sizeof(*ref));
4417
4418         if (!ref)
4419                 return NULL;
4420         memset(&ref->node, 0, sizeof(ref->node));
4421         ref->node.is_data = 1;
4422
4423         if (parent > 0) {
4424                 ref->parent = parent;
4425                 ref->owner = 0;
4426                 ref->offset = 0;
4427                 ref->node.full_backref = 1;
4428         } else {
4429                 ref->root = root;
4430                 ref->owner = owner;
4431                 ref->offset = offset;
4432                 ref->node.full_backref = 0;
4433         }
4434         ref->bytes = max_size;
4435         ref->found_ref = 0;
4436         ref->num_refs = 0;
4437         list_add_tail(&ref->node.list, &rec->backrefs);
4438         if (max_size > rec->max_size)
4439                 rec->max_size = max_size;
4440         return ref;
4441 }
4442
4443 /* Check if the type of extent matches with its chunk */
4444 static void check_extent_type(struct extent_record *rec)
4445 {
4446         struct btrfs_block_group_cache *bg_cache;
4447
4448         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4449         if (!bg_cache)
4450                 return;
4451
4452         /* data extent, check chunk directly*/
4453         if (!rec->metadata) {
4454                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4455                         rec->wrong_chunk_type = 1;
4456                 return;
4457         }
4458
4459         /* metadata extent, check the obvious case first */
4460         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4461                                  BTRFS_BLOCK_GROUP_METADATA))) {
4462                 rec->wrong_chunk_type = 1;
4463                 return;
4464         }
4465
4466         /*
4467          * Check SYSTEM extent, as it's also marked as metadata, we can only
4468          * make sure it's a SYSTEM extent by its backref
4469          */
4470         if (!list_empty(&rec->backrefs)) {
4471                 struct extent_backref *node;
4472                 struct tree_backref *tback;
4473                 u64 bg_type;
4474
4475                 node = list_entry(rec->backrefs.next, struct extent_backref,
4476                                   list);
4477                 if (node->is_data) {
4478                         /* tree block shouldn't have data backref */
4479                         rec->wrong_chunk_type = 1;
4480                         return;
4481                 }
4482                 tback = container_of(node, struct tree_backref, node);
4483
4484                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4485                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4486                 else
4487                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4488                 if (!(bg_cache->flags & bg_type))
4489                         rec->wrong_chunk_type = 1;
4490         }
4491 }
4492
4493 static int add_extent_rec(struct cache_tree *extent_cache,
4494                           struct btrfs_key *parent_key, u64 parent_gen,
4495                           u64 start, u64 nr, u64 extent_item_refs,
4496                           int is_root, int inc_ref, int set_checked,
4497                           int metadata, int extent_rec, u64 max_size)
4498 {
4499         struct extent_record *rec;
4500         struct cache_extent *cache;
4501         int ret = 0;
4502         int dup = 0;
4503
4504         cache = lookup_cache_extent(extent_cache, start, nr);
4505         if (cache) {
4506                 rec = container_of(cache, struct extent_record, cache);
4507                 if (inc_ref)
4508                         rec->refs++;
4509                 if (rec->nr == 1)
4510                         rec->nr = max(nr, max_size);
4511
4512                 /*
4513                  * We need to make sure to reset nr to whatever the extent
4514                  * record says was the real size, this way we can compare it to
4515                  * the backrefs.
4516                  */
4517                 if (extent_rec) {
4518                         if (start != rec->start || rec->found_rec) {
4519                                 struct extent_record *tmp;
4520
4521                                 dup = 1;
4522                                 if (list_empty(&rec->list))
4523                                         list_add_tail(&rec->list,
4524                                                       &duplicate_extents);
4525
4526                                 /*
4527                                  * We have to do this song and dance in case we
4528                                  * find an extent record that falls inside of
4529                                  * our current extent record but does not have
4530                                  * the same objectid.
4531                                  */
4532                                 tmp = malloc(sizeof(*tmp));
4533                                 if (!tmp)
4534                                         return -ENOMEM;
4535                                 tmp->start = start;
4536                                 tmp->max_size = max_size;
4537                                 tmp->nr = nr;
4538                                 tmp->found_rec = 1;
4539                                 tmp->metadata = metadata;
4540                                 tmp->extent_item_refs = extent_item_refs;
4541                                 INIT_LIST_HEAD(&tmp->list);
4542                                 list_add_tail(&tmp->list, &rec->dups);
4543                                 rec->num_duplicates++;
4544                         } else {
4545                                 rec->nr = nr;
4546                                 rec->found_rec = 1;
4547                         }
4548                 }
4549
4550                 if (extent_item_refs && !dup) {
4551                         if (rec->extent_item_refs) {
4552                                 fprintf(stderr, "block %llu rec "
4553                                         "extent_item_refs %llu, passed %llu\n",
4554                                         (unsigned long long)start,
4555                                         (unsigned long long)
4556                                                         rec->extent_item_refs,
4557                                         (unsigned long long)extent_item_refs);
4558                         }
4559                         rec->extent_item_refs = extent_item_refs;
4560                 }
4561                 if (is_root)
4562                         rec->is_root = 1;
4563                 if (set_checked) {
4564                         rec->content_checked = 1;
4565                         rec->owner_ref_checked = 1;
4566                 }
4567
4568                 if (parent_key)
4569                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4570                 if (parent_gen)
4571                         rec->parent_generation = parent_gen;
4572
4573                 if (rec->max_size < max_size)
4574                         rec->max_size = max_size;
4575
4576                 /*
4577                  * A metadata extent can't cross stripe_len boundary, otherwise
4578                  * kernel scrub won't be able to handle it.
4579                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4580                  * it.
4581                  */
4582                 if (metadata && check_crossing_stripes(rec->start,
4583                                                        rec->max_size))
4584                                 rec->crossing_stripes = 1;
4585                 check_extent_type(rec);
4586                 maybe_free_extent_rec(extent_cache, rec);
4587                 return ret;
4588         }
4589         rec = malloc(sizeof(*rec));
4590         if (!rec)
4591                 return -ENOMEM;
4592         rec->start = start;
4593         rec->max_size = max_size;
4594         rec->nr = max(nr, max_size);
4595         rec->found_rec = !!extent_rec;
4596         rec->content_checked = 0;
4597         rec->owner_ref_checked = 0;
4598         rec->num_duplicates = 0;
4599         rec->metadata = metadata;
4600         rec->flag_block_full_backref = -1;
4601         rec->bad_full_backref = 0;
4602         rec->crossing_stripes = 0;
4603         rec->wrong_chunk_type = 0;
4604         INIT_LIST_HEAD(&rec->backrefs);
4605         INIT_LIST_HEAD(&rec->dups);
4606         INIT_LIST_HEAD(&rec->list);
4607
4608         if (is_root)
4609                 rec->is_root = 1;
4610         else
4611                 rec->is_root = 0;
4612
4613         if (inc_ref)
4614                 rec->refs = 1;
4615         else
4616                 rec->refs = 0;
4617
4618         if (extent_item_refs)
4619                 rec->extent_item_refs = extent_item_refs;
4620         else
4621                 rec->extent_item_refs = 0;
4622
4623         if (parent_key)
4624                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4625         else
4626                 memset(&rec->parent_key, 0, sizeof(*parent_key));
4627
4628         if (parent_gen)
4629                 rec->parent_generation = parent_gen;
4630         else
4631                 rec->parent_generation = 0;
4632
4633         rec->cache.start = start;
4634         rec->cache.size = nr;
4635         ret = insert_cache_extent(extent_cache, &rec->cache);
4636         BUG_ON(ret);
4637         bytes_used += nr;
4638         if (set_checked) {
4639                 rec->content_checked = 1;
4640                 rec->owner_ref_checked = 1;
4641         }
4642
4643         if (metadata)
4644                 if (check_crossing_stripes(rec->start, rec->max_size))
4645                         rec->crossing_stripes = 1;
4646         check_extent_type(rec);
4647         return ret;
4648 }
4649
4650 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4651                             u64 parent, u64 root, int found_ref)
4652 {
4653         struct extent_record *rec;
4654         struct tree_backref *back;
4655         struct cache_extent *cache;
4656
4657         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4658         if (!cache) {
4659                 add_extent_rec(extent_cache, NULL, 0, bytenr,
4660                                1, 0, 0, 0, 0, 1, 0, 0);
4661                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4662                 if (!cache)
4663                         abort();
4664         }
4665
4666         rec = container_of(cache, struct extent_record, cache);
4667         if (rec->start != bytenr) {
4668                 abort();
4669         }
4670
4671         back = find_tree_backref(rec, parent, root);
4672         if (!back) {
4673                 back = alloc_tree_backref(rec, parent, root);
4674                 BUG_ON(!back);
4675         }
4676
4677         if (found_ref) {
4678                 if (back->node.found_ref) {
4679                         fprintf(stderr, "Extent back ref already exists "
4680                                 "for %llu parent %llu root %llu \n",
4681                                 (unsigned long long)bytenr,
4682                                 (unsigned long long)parent,
4683                                 (unsigned long long)root);
4684                 }
4685                 back->node.found_ref = 1;
4686         } else {
4687                 if (back->node.found_extent_tree) {
4688                         fprintf(stderr, "Extent back ref already exists "
4689                                 "for %llu parent %llu root %llu \n",
4690                                 (unsigned long long)bytenr,
4691                                 (unsigned long long)parent,
4692                                 (unsigned long long)root);
4693                 }
4694                 back->node.found_extent_tree = 1;
4695         }
4696         check_extent_type(rec);
4697         maybe_free_extent_rec(extent_cache, rec);
4698         return 0;
4699 }
4700
4701 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4702                             u64 parent, u64 root, u64 owner, u64 offset,
4703                             u32 num_refs, int found_ref, u64 max_size)
4704 {
4705         struct extent_record *rec;
4706         struct data_backref *back;
4707         struct cache_extent *cache;
4708
4709         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4710         if (!cache) {
4711                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4712                                0, 0, max_size);
4713                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4714                 if (!cache)
4715                         abort();
4716         }
4717
4718         rec = container_of(cache, struct extent_record, cache);
4719         if (rec->max_size < max_size)
4720                 rec->max_size = max_size;
4721
4722         /*
4723          * If found_ref is set then max_size is the real size and must match the
4724          * existing refs.  So if we have already found a ref then we need to
4725          * make sure that this ref matches the existing one, otherwise we need
4726          * to add a new backref so we can notice that the backrefs don't match
4727          * and we need to figure out who is telling the truth.  This is to
4728          * account for that awful fsync bug I introduced where we'd end up with
4729          * a btrfs_file_extent_item that would have its length include multiple
4730          * prealloc extents or point inside of a prealloc extent.
4731          */
4732         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4733                                  bytenr, max_size);
4734         if (!back) {
4735                 back = alloc_data_backref(rec, parent, root, owner, offset,
4736                                           max_size);
4737                 BUG_ON(!back);
4738         }
4739
4740         if (found_ref) {
4741                 BUG_ON(num_refs != 1);
4742                 if (back->node.found_ref)
4743                         BUG_ON(back->bytes != max_size);
4744                 back->node.found_ref = 1;
4745                 back->found_ref += 1;
4746                 back->bytes = max_size;
4747                 back->disk_bytenr = bytenr;
4748                 rec->refs += 1;
4749                 rec->content_checked = 1;
4750                 rec->owner_ref_checked = 1;
4751         } else {
4752                 if (back->node.found_extent_tree) {
4753                         fprintf(stderr, "Extent back ref already exists "
4754                                 "for %llu parent %llu root %llu "
4755                                 "owner %llu offset %llu num_refs %lu\n",
4756                                 (unsigned long long)bytenr,
4757                                 (unsigned long long)parent,
4758                                 (unsigned long long)root,
4759                                 (unsigned long long)owner,
4760                                 (unsigned long long)offset,
4761                                 (unsigned long)num_refs);
4762                 }
4763                 back->num_refs = num_refs;
4764                 back->node.found_extent_tree = 1;
4765         }
4766         maybe_free_extent_rec(extent_cache, rec);
4767         return 0;
4768 }
4769
4770 static int add_pending(struct cache_tree *pending,
4771                        struct cache_tree *seen, u64 bytenr, u32 size)
4772 {
4773         int ret;
4774         ret = add_cache_extent(seen, bytenr, size);
4775         if (ret)
4776                 return ret;
4777         add_cache_extent(pending, bytenr, size);
4778         return 0;
4779 }
4780
4781 static int pick_next_pending(struct cache_tree *pending,
4782                         struct cache_tree *reada,
4783                         struct cache_tree *nodes,
4784                         u64 last, struct block_info *bits, int bits_nr,
4785                         int *reada_bits)
4786 {
4787         unsigned long node_start = last;
4788         struct cache_extent *cache;
4789         int ret;
4790
4791         cache = search_cache_extent(reada, 0);
4792         if (cache) {
4793                 bits[0].start = cache->start;
4794                 bits[0].size = cache->size;
4795                 *reada_bits = 1;
4796                 return 1;
4797         }
4798         *reada_bits = 0;
4799         if (node_start > 32768)
4800                 node_start -= 32768;
4801
4802         cache = search_cache_extent(nodes, node_start);
4803         if (!cache)
4804                 cache = search_cache_extent(nodes, 0);
4805
4806         if (!cache) {
4807                  cache = search_cache_extent(pending, 0);
4808                  if (!cache)
4809                          return 0;
4810                  ret = 0;
4811                  do {
4812                          bits[ret].start = cache->start;
4813                          bits[ret].size = cache->size;
4814                          cache = next_cache_extent(cache);
4815                          ret++;
4816                  } while (cache && ret < bits_nr);
4817                  return ret;
4818         }
4819
4820         ret = 0;
4821         do {
4822                 bits[ret].start = cache->start;
4823                 bits[ret].size = cache->size;
4824                 cache = next_cache_extent(cache);
4825                 ret++;
4826         } while (cache && ret < bits_nr);
4827
4828         if (bits_nr - ret > 8) {
4829                 u64 lookup = bits[0].start + bits[0].size;
4830                 struct cache_extent *next;
4831                 next = search_cache_extent(pending, lookup);
4832                 while(next) {
4833                         if (next->start - lookup > 32768)
4834                                 break;
4835                         bits[ret].start = next->start;
4836                         bits[ret].size = next->size;
4837                         lookup = next->start + next->size;
4838                         ret++;
4839                         if (ret == bits_nr)
4840                                 break;
4841                         next = next_cache_extent(next);
4842                         if (!next)
4843                                 break;
4844                 }
4845         }
4846         return ret;
4847 }
4848
4849 static void free_chunk_record(struct cache_extent *cache)
4850 {
4851         struct chunk_record *rec;
4852
4853         rec = container_of(cache, struct chunk_record, cache);
4854         list_del_init(&rec->list);
4855         list_del_init(&rec->dextents);
4856         free(rec);
4857 }
4858
4859 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4860 {
4861         cache_tree_free_extents(chunk_cache, free_chunk_record);
4862 }
4863
4864 static void free_device_record(struct rb_node *node)
4865 {
4866         struct device_record *rec;
4867
4868         rec = container_of(node, struct device_record, node);
4869         free(rec);
4870 }
4871
4872 FREE_RB_BASED_TREE(device_cache, free_device_record);
4873
4874 int insert_block_group_record(struct block_group_tree *tree,
4875                               struct block_group_record *bg_rec)
4876 {
4877         int ret;
4878
4879         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4880         if (ret)
4881                 return ret;
4882
4883         list_add_tail(&bg_rec->list, &tree->block_groups);
4884         return 0;
4885 }
4886
4887 static void free_block_group_record(struct cache_extent *cache)
4888 {
4889         struct block_group_record *rec;
4890
4891         rec = container_of(cache, struct block_group_record, cache);
4892         list_del_init(&rec->list);
4893         free(rec);
4894 }
4895
4896 void free_block_group_tree(struct block_group_tree *tree)
4897 {
4898         cache_tree_free_extents(&tree->tree, free_block_group_record);
4899 }
4900
4901 int insert_device_extent_record(struct device_extent_tree *tree,
4902                                 struct device_extent_record *de_rec)
4903 {
4904         int ret;
4905
4906         /*
4907          * Device extent is a bit different from the other extents, because
4908          * the extents which belong to the different devices may have the
4909          * same start and size, so we need use the special extent cache
4910          * search/insert functions.
4911          */
4912         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4913         if (ret)
4914                 return ret;
4915
4916         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4917         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4918         return 0;
4919 }
4920
4921 static void free_device_extent_record(struct cache_extent *cache)
4922 {
4923         struct device_extent_record *rec;
4924
4925         rec = container_of(cache, struct device_extent_record, cache);
4926         if (!list_empty(&rec->chunk_list))
4927                 list_del_init(&rec->chunk_list);
4928         if (!list_empty(&rec->device_list))
4929                 list_del_init(&rec->device_list);
4930         free(rec);
4931 }
4932
4933 void free_device_extent_tree(struct device_extent_tree *tree)
4934 {
4935         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4936 }
4937
4938 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4939 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4940                                  struct extent_buffer *leaf, int slot)
4941 {
4942         struct btrfs_extent_ref_v0 *ref0;
4943         struct btrfs_key key;
4944
4945         btrfs_item_key_to_cpu(leaf, &key, slot);
4946         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4947         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4948                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4949         } else {
4950                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4951                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4952         }
4953         return 0;
4954 }
4955 #endif
4956
4957 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4958                                             struct btrfs_key *key,
4959                                             int slot)
4960 {
4961         struct btrfs_chunk *ptr;
4962         struct chunk_record *rec;
4963         int num_stripes, i;
4964
4965         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4966         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4967
4968         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4969         if (!rec) {
4970                 fprintf(stderr, "memory allocation failed\n");
4971                 exit(-1);
4972         }
4973
4974         INIT_LIST_HEAD(&rec->list);
4975         INIT_LIST_HEAD(&rec->dextents);
4976         rec->bg_rec = NULL;
4977
4978         rec->cache.start = key->offset;
4979         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4980
4981         rec->generation = btrfs_header_generation(leaf);
4982
4983         rec->objectid = key->objectid;
4984         rec->type = key->type;
4985         rec->offset = key->offset;
4986
4987         rec->length = rec->cache.size;
4988         rec->owner = btrfs_chunk_owner(leaf, ptr);
4989         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4990         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4991         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4992         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4993         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4994         rec->num_stripes = num_stripes;
4995         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4996
4997         for (i = 0; i < rec->num_stripes; ++i) {
4998                 rec->stripes[i].devid =
4999                         btrfs_stripe_devid_nr(leaf, ptr, i);
5000                 rec->stripes[i].offset =
5001                         btrfs_stripe_offset_nr(leaf, ptr, i);
5002                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5003                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5004                                 BTRFS_UUID_SIZE);
5005         }
5006
5007         return rec;
5008 }
5009
5010 static int process_chunk_item(struct cache_tree *chunk_cache,
5011                               struct btrfs_key *key, struct extent_buffer *eb,
5012                               int slot)
5013 {
5014         struct chunk_record *rec;
5015         int ret = 0;
5016
5017         rec = btrfs_new_chunk_record(eb, key, slot);
5018         ret = insert_cache_extent(chunk_cache, &rec->cache);
5019         if (ret) {
5020                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5021                         rec->offset, rec->length);
5022                 free(rec);
5023         }
5024
5025         return ret;
5026 }
5027
5028 static int process_device_item(struct rb_root *dev_cache,
5029                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5030 {
5031         struct btrfs_dev_item *ptr;
5032         struct device_record *rec;
5033         int ret = 0;
5034
5035         ptr = btrfs_item_ptr(eb,
5036                 slot, struct btrfs_dev_item);
5037
5038         rec = malloc(sizeof(*rec));
5039         if (!rec) {
5040                 fprintf(stderr, "memory allocation failed\n");
5041                 return -ENOMEM;
5042         }
5043
5044         rec->devid = key->offset;
5045         rec->generation = btrfs_header_generation(eb);
5046
5047         rec->objectid = key->objectid;
5048         rec->type = key->type;
5049         rec->offset = key->offset;
5050
5051         rec->devid = btrfs_device_id(eb, ptr);
5052         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5053         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5054
5055         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5056         if (ret) {
5057                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5058                 free(rec);
5059         }
5060
5061         return ret;
5062 }
5063
5064 struct block_group_record *
5065 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5066                              int slot)
5067 {
5068         struct btrfs_block_group_item *ptr;
5069         struct block_group_record *rec;
5070
5071         rec = calloc(1, sizeof(*rec));
5072         if (!rec) {
5073                 fprintf(stderr, "memory allocation failed\n");
5074                 exit(-1);
5075         }
5076
5077         rec->cache.start = key->objectid;
5078         rec->cache.size = key->offset;
5079
5080         rec->generation = btrfs_header_generation(leaf);
5081
5082         rec->objectid = key->objectid;
5083         rec->type = key->type;
5084         rec->offset = key->offset;
5085
5086         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5087         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5088
5089         INIT_LIST_HEAD(&rec->list);
5090
5091         return rec;
5092 }
5093
5094 static int process_block_group_item(struct block_group_tree *block_group_cache,
5095                                     struct btrfs_key *key,
5096                                     struct extent_buffer *eb, int slot)
5097 {
5098         struct block_group_record *rec;
5099         int ret = 0;
5100
5101         rec = btrfs_new_block_group_record(eb, key, slot);
5102         ret = insert_block_group_record(block_group_cache, rec);
5103         if (ret) {
5104                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5105                         rec->objectid, rec->offset);
5106                 free(rec);
5107         }
5108
5109         return ret;
5110 }
5111
5112 struct device_extent_record *
5113 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5114                                struct btrfs_key *key, int slot)
5115 {
5116         struct device_extent_record *rec;
5117         struct btrfs_dev_extent *ptr;
5118
5119         rec = calloc(1, sizeof(*rec));
5120         if (!rec) {
5121                 fprintf(stderr, "memory allocation failed\n");
5122                 exit(-1);
5123         }
5124
5125         rec->cache.objectid = key->objectid;
5126         rec->cache.start = key->offset;
5127
5128         rec->generation = btrfs_header_generation(leaf);
5129
5130         rec->objectid = key->objectid;
5131         rec->type = key->type;
5132         rec->offset = key->offset;
5133
5134         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5135         rec->chunk_objecteid =
5136                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5137         rec->chunk_offset =
5138                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5139         rec->length = btrfs_dev_extent_length(leaf, ptr);
5140         rec->cache.size = rec->length;
5141
5142         INIT_LIST_HEAD(&rec->chunk_list);
5143         INIT_LIST_HEAD(&rec->device_list);
5144
5145         return rec;
5146 }
5147
5148 static int
5149 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5150                            struct btrfs_key *key, struct extent_buffer *eb,
5151                            int slot)
5152 {
5153         struct device_extent_record *rec;
5154         int ret;
5155
5156         rec = btrfs_new_device_extent_record(eb, key, slot);
5157         ret = insert_device_extent_record(dev_extent_cache, rec);
5158         if (ret) {
5159                 fprintf(stderr,
5160                         "Device extent[%llu, %llu, %llu] existed.\n",
5161                         rec->objectid, rec->offset, rec->length);
5162                 free(rec);
5163         }
5164
5165         return ret;
5166 }
5167
5168 static int process_extent_item(struct btrfs_root *root,
5169                                struct cache_tree *extent_cache,
5170                                struct extent_buffer *eb, int slot)
5171 {
5172         struct btrfs_extent_item *ei;
5173         struct btrfs_extent_inline_ref *iref;
5174         struct btrfs_extent_data_ref *dref;
5175         struct btrfs_shared_data_ref *sref;
5176         struct btrfs_key key;
5177         unsigned long end;
5178         unsigned long ptr;
5179         int type;
5180         u32 item_size = btrfs_item_size_nr(eb, slot);
5181         u64 refs = 0;
5182         u64 offset;
5183         u64 num_bytes;
5184         int metadata = 0;
5185
5186         btrfs_item_key_to_cpu(eb, &key, slot);
5187
5188         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5189                 metadata = 1;
5190                 num_bytes = root->leafsize;
5191         } else {
5192                 num_bytes = key.offset;
5193         }
5194
5195         if (item_size < sizeof(*ei)) {
5196 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5197                 struct btrfs_extent_item_v0 *ei0;
5198                 BUG_ON(item_size != sizeof(*ei0));
5199                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5200                 refs = btrfs_extent_refs_v0(eb, ei0);
5201 #else
5202                 BUG();
5203 #endif
5204                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5205                                       num_bytes, refs, 0, 0, 0, metadata, 1,
5206                                       num_bytes);
5207         }
5208
5209         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5210         refs = btrfs_extent_refs(eb, ei);
5211         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5212                 metadata = 1;
5213         else
5214                 metadata = 0;
5215
5216         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5217                        refs, 0, 0, 0, metadata, 1, num_bytes);
5218
5219         ptr = (unsigned long)(ei + 1);
5220         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5221             key.type == BTRFS_EXTENT_ITEM_KEY)
5222                 ptr += sizeof(struct btrfs_tree_block_info);
5223
5224         end = (unsigned long)ei + item_size;
5225         while (ptr < end) {
5226                 iref = (struct btrfs_extent_inline_ref *)ptr;
5227                 type = btrfs_extent_inline_ref_type(eb, iref);
5228                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5229                 switch (type) {
5230                 case BTRFS_TREE_BLOCK_REF_KEY:
5231                         add_tree_backref(extent_cache, key.objectid,
5232                                          0, offset, 0);
5233                         break;
5234                 case BTRFS_SHARED_BLOCK_REF_KEY:
5235                         add_tree_backref(extent_cache, key.objectid,
5236                                          offset, 0, 0);
5237                         break;
5238                 case BTRFS_EXTENT_DATA_REF_KEY:
5239                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5240                         add_data_backref(extent_cache, key.objectid, 0,
5241                                         btrfs_extent_data_ref_root(eb, dref),
5242                                         btrfs_extent_data_ref_objectid(eb,
5243                                                                        dref),
5244                                         btrfs_extent_data_ref_offset(eb, dref),
5245                                         btrfs_extent_data_ref_count(eb, dref),
5246                                         0, num_bytes);
5247                         break;
5248                 case BTRFS_SHARED_DATA_REF_KEY:
5249                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5250                         add_data_backref(extent_cache, key.objectid, offset,
5251                                         0, 0, 0,
5252                                         btrfs_shared_data_ref_count(eb, sref),
5253                                         0, num_bytes);
5254                         break;
5255                 default:
5256                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5257                                 key.objectid, key.type, num_bytes);
5258                         goto out;
5259                 }
5260                 ptr += btrfs_extent_inline_ref_size(type);
5261         }
5262         WARN_ON(ptr > end);
5263 out:
5264         return 0;
5265 }
5266
5267 static int check_cache_range(struct btrfs_root *root,
5268                              struct btrfs_block_group_cache *cache,
5269                              u64 offset, u64 bytes)
5270 {
5271         struct btrfs_free_space *entry;
5272         u64 *logical;
5273         u64 bytenr;
5274         int stripe_len;
5275         int i, nr, ret;
5276
5277         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5278                 bytenr = btrfs_sb_offset(i);
5279                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5280                                        cache->key.objectid, bytenr, 0,
5281                                        &logical, &nr, &stripe_len);
5282                 if (ret)
5283                         return ret;
5284
5285                 while (nr--) {
5286                         if (logical[nr] + stripe_len <= offset)
5287                                 continue;
5288                         if (offset + bytes <= logical[nr])
5289                                 continue;
5290                         if (logical[nr] == offset) {
5291                                 if (stripe_len >= bytes) {
5292                                         kfree(logical);
5293                                         return 0;
5294                                 }
5295                                 bytes -= stripe_len;
5296                                 offset += stripe_len;
5297                         } else if (logical[nr] < offset) {
5298                                 if (logical[nr] + stripe_len >=
5299                                     offset + bytes) {
5300                                         kfree(logical);
5301                                         return 0;
5302                                 }
5303                                 bytes = (offset + bytes) -
5304                                         (logical[nr] + stripe_len);
5305                                 offset = logical[nr] + stripe_len;
5306                         } else {
5307                                 /*
5308                                  * Could be tricky, the super may land in the
5309                                  * middle of the area we're checking.  First
5310                                  * check the easiest case, it's at the end.
5311                                  */
5312                                 if (logical[nr] + stripe_len >=
5313                                     bytes + offset) {
5314                                         bytes = logical[nr] - offset;
5315                                         continue;
5316                                 }
5317
5318                                 /* Check the left side */
5319                                 ret = check_cache_range(root, cache,
5320                                                         offset,
5321                                                         logical[nr] - offset);
5322                                 if (ret) {
5323                                         kfree(logical);
5324                                         return ret;
5325                                 }
5326
5327                                 /* Now we continue with the right side */
5328                                 bytes = (offset + bytes) -
5329                                         (logical[nr] + stripe_len);
5330                                 offset = logical[nr] + stripe_len;
5331                         }
5332                 }
5333
5334                 kfree(logical);
5335         }
5336
5337         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5338         if (!entry) {
5339                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5340                         offset, offset+bytes);
5341                 return -EINVAL;
5342         }
5343
5344         if (entry->offset != offset) {
5345                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5346                         entry->offset);
5347                 return -EINVAL;
5348         }
5349
5350         if (entry->bytes != bytes) {
5351                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5352                         bytes, entry->bytes, offset);
5353                 return -EINVAL;
5354         }
5355
5356         unlink_free_space(cache->free_space_ctl, entry);
5357         free(entry);
5358         return 0;
5359 }
5360
5361 static int verify_space_cache(struct btrfs_root *root,
5362                               struct btrfs_block_group_cache *cache)
5363 {
5364         struct btrfs_path *path;
5365         struct extent_buffer *leaf;
5366         struct btrfs_key key;
5367         u64 last;
5368         int ret = 0;
5369
5370         path = btrfs_alloc_path();
5371         if (!path)
5372                 return -ENOMEM;
5373
5374         root = root->fs_info->extent_root;
5375
5376         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5377
5378         key.objectid = last;
5379         key.offset = 0;
5380         key.type = BTRFS_EXTENT_ITEM_KEY;
5381
5382         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5383         if (ret < 0)
5384                 goto out;
5385         ret = 0;
5386         while (1) {
5387                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5388                         ret = btrfs_next_leaf(root, path);
5389                         if (ret < 0)
5390                                 goto out;
5391                         if (ret > 0) {
5392                                 ret = 0;
5393                                 break;
5394                         }
5395                 }
5396                 leaf = path->nodes[0];
5397                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5398                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5399                         break;
5400                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5401                     key.type != BTRFS_METADATA_ITEM_KEY) {
5402                         path->slots[0]++;
5403                         continue;
5404                 }
5405
5406                 if (last == key.objectid) {
5407                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5408                                 last = key.objectid + key.offset;
5409                         else
5410                                 last = key.objectid + root->leafsize;
5411                         path->slots[0]++;
5412                         continue;
5413                 }
5414
5415                 ret = check_cache_range(root, cache, last,
5416                                         key.objectid - last);
5417                 if (ret)
5418                         break;
5419                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5420                         last = key.objectid + key.offset;
5421                 else
5422                         last = key.objectid + root->leafsize;
5423                 path->slots[0]++;
5424         }
5425
5426         if (last < cache->key.objectid + cache->key.offset)
5427                 ret = check_cache_range(root, cache, last,
5428                                         cache->key.objectid +
5429                                         cache->key.offset - last);
5430
5431 out:
5432         btrfs_free_path(path);
5433
5434         if (!ret &&
5435             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5436                 fprintf(stderr, "There are still entries left in the space "
5437                         "cache\n");
5438                 ret = -EINVAL;
5439         }
5440
5441         return ret;
5442 }
5443
5444 static int check_space_cache(struct btrfs_root *root)
5445 {
5446         struct btrfs_block_group_cache *cache;
5447         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5448         int ret;
5449         int error = 0;
5450
5451         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5452             btrfs_super_generation(root->fs_info->super_copy) !=
5453             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5454                 printf("cache and super generation don't match, space cache "
5455                        "will be invalidated\n");
5456                 return 0;
5457         }
5458
5459         if (ctx.progress_enabled) {
5460                 ctx.tp = TASK_FREE_SPACE;
5461                 task_start(ctx.info);
5462         }
5463
5464         while (1) {
5465                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5466                 if (!cache)
5467                         break;
5468
5469                 start = cache->key.objectid + cache->key.offset;
5470                 if (!cache->free_space_ctl) {
5471                         if (btrfs_init_free_space_ctl(cache,
5472                                                       root->sectorsize)) {
5473                                 ret = -ENOMEM;
5474                                 break;
5475                         }
5476                 } else {
5477                         btrfs_remove_free_space_cache(cache);
5478                 }
5479
5480                 if (btrfs_fs_compat_ro(root->fs_info,
5481                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5482                         ret = exclude_super_stripes(root, cache);
5483                         if (ret) {
5484                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5485                                         strerror(-ret));
5486                                 error++;
5487                                 continue;
5488                         }
5489                         ret = load_free_space_tree(root->fs_info, cache);
5490                         free_excluded_extents(root, cache);
5491                         if (ret < 0) {
5492                                 fprintf(stderr, "could not load free space tree: %s\n",
5493                                         strerror(-ret));
5494                                 error++;
5495                                 continue;
5496                         }
5497                         error += ret;
5498                 } else {
5499                         ret = load_free_space_cache(root->fs_info, cache);
5500                         if (!ret)
5501                                 continue;
5502                 }
5503
5504                 ret = verify_space_cache(root, cache);
5505                 if (ret) {
5506                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
5507                                 cache->key.objectid);
5508                         error++;
5509                 }
5510         }
5511
5512         task_stop(ctx.info);
5513
5514         return error ? -EINVAL : 0;
5515 }
5516
5517 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5518                         u64 num_bytes, unsigned long leaf_offset,
5519                         struct extent_buffer *eb) {
5520
5521         u64 offset = 0;
5522         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5523         char *data;
5524         unsigned long csum_offset;
5525         u32 csum;
5526         u32 csum_expected;
5527         u64 read_len;
5528         u64 data_checked = 0;
5529         u64 tmp;
5530         int ret = 0;
5531         int mirror;
5532         int num_copies;
5533
5534         if (num_bytes % root->sectorsize)
5535                 return -EINVAL;
5536
5537         data = malloc(num_bytes);
5538         if (!data)
5539                 return -ENOMEM;
5540
5541         while (offset < num_bytes) {
5542                 mirror = 0;
5543 again:
5544                 read_len = num_bytes - offset;
5545                 /* read as much space once a time */
5546                 ret = read_extent_data(root, data + offset,
5547                                 bytenr + offset, &read_len, mirror);
5548                 if (ret)
5549                         goto out;
5550                 data_checked = 0;
5551                 /* verify every 4k data's checksum */
5552                 while (data_checked < read_len) {
5553                         csum = ~(u32)0;
5554                         tmp = offset + data_checked;
5555
5556                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5557                                                csum, root->sectorsize);
5558                         btrfs_csum_final(csum, (char *)&csum);
5559
5560                         csum_offset = leaf_offset +
5561                                  tmp / root->sectorsize * csum_size;
5562                         read_extent_buffer(eb, (char *)&csum_expected,
5563                                            csum_offset, csum_size);
5564                         /* try another mirror */
5565                         if (csum != csum_expected) {
5566                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5567                                                 mirror, bytenr + tmp,
5568                                                 csum, csum_expected);
5569                                 num_copies = btrfs_num_copies(
5570                                                 &root->fs_info->mapping_tree,
5571                                                 bytenr, num_bytes);
5572                                 if (mirror < num_copies - 1) {
5573                                         mirror += 1;
5574                                         goto again;
5575                                 }
5576                         }
5577                         data_checked += root->sectorsize;
5578                 }
5579                 offset += read_len;
5580         }
5581 out:
5582         free(data);
5583         return ret;
5584 }
5585
5586 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5587                                u64 num_bytes)
5588 {
5589         struct btrfs_path *path;
5590         struct extent_buffer *leaf;
5591         struct btrfs_key key;
5592         int ret;
5593
5594         path = btrfs_alloc_path();
5595         if (!path) {
5596                 fprintf(stderr, "Error allocing path\n");
5597                 return -ENOMEM;
5598         }
5599
5600         key.objectid = bytenr;
5601         key.type = BTRFS_EXTENT_ITEM_KEY;
5602         key.offset = (u64)-1;
5603
5604 again:
5605         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5606                                 0, 0);
5607         if (ret < 0) {
5608                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5609                 btrfs_free_path(path);
5610                 return ret;
5611         } else if (ret) {
5612                 if (path->slots[0] > 0) {
5613                         path->slots[0]--;
5614                 } else {
5615                         ret = btrfs_prev_leaf(root, path);
5616                         if (ret < 0) {
5617                                 goto out;
5618                         } else if (ret > 0) {
5619                                 ret = 0;
5620                                 goto out;
5621                         }
5622                 }
5623         }
5624
5625         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5626
5627         /*
5628          * Block group items come before extent items if they have the same
5629          * bytenr, so walk back one more just in case.  Dear future traveler,
5630          * first congrats on mastering time travel.  Now if it's not too much
5631          * trouble could you go back to 2006 and tell Chris to make the
5632          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5633          * EXTENT_ITEM_KEY please?
5634          */
5635         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5636                 if (path->slots[0] > 0) {
5637                         path->slots[0]--;
5638                 } else {
5639                         ret = btrfs_prev_leaf(root, path);
5640                         if (ret < 0) {
5641                                 goto out;
5642                         } else if (ret > 0) {
5643                                 ret = 0;
5644                                 goto out;
5645                         }
5646                 }
5647                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5648         }
5649
5650         while (num_bytes) {
5651                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5652                         ret = btrfs_next_leaf(root, path);
5653                         if (ret < 0) {
5654                                 fprintf(stderr, "Error going to next leaf "
5655                                         "%d\n", ret);
5656                                 btrfs_free_path(path);
5657                                 return ret;
5658                         } else if (ret) {
5659                                 break;
5660                         }
5661                 }
5662                 leaf = path->nodes[0];
5663                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5664                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5665                         path->slots[0]++;
5666                         continue;
5667                 }
5668                 if (key.objectid + key.offset < bytenr) {
5669                         path->slots[0]++;
5670                         continue;
5671                 }
5672                 if (key.objectid > bytenr + num_bytes)
5673                         break;
5674
5675                 if (key.objectid == bytenr) {
5676                         if (key.offset >= num_bytes) {
5677                                 num_bytes = 0;
5678                                 break;
5679                         }
5680                         num_bytes -= key.offset;
5681                         bytenr += key.offset;
5682                 } else if (key.objectid < bytenr) {
5683                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5684                                 num_bytes = 0;
5685                                 break;
5686                         }
5687                         num_bytes = (bytenr + num_bytes) -
5688                                 (key.objectid + key.offset);
5689                         bytenr = key.objectid + key.offset;
5690                 } else {
5691                         if (key.objectid + key.offset < bytenr + num_bytes) {
5692                                 u64 new_start = key.objectid + key.offset;
5693                                 u64 new_bytes = bytenr + num_bytes - new_start;
5694
5695                                 /*
5696                                  * Weird case, the extent is in the middle of
5697                                  * our range, we'll have to search one side
5698                                  * and then the other.  Not sure if this happens
5699                                  * in real life, but no harm in coding it up
5700                                  * anyway just in case.
5701                                  */
5702                                 btrfs_release_path(path);
5703                                 ret = check_extent_exists(root, new_start,
5704                                                           new_bytes);
5705                                 if (ret) {
5706                                         fprintf(stderr, "Right section didn't "
5707                                                 "have a record\n");
5708                                         break;
5709                                 }
5710                                 num_bytes = key.objectid - bytenr;
5711                                 goto again;
5712                         }
5713                         num_bytes = key.objectid - bytenr;
5714                 }
5715                 path->slots[0]++;
5716         }
5717         ret = 0;
5718
5719 out:
5720         if (num_bytes && !ret) {
5721                 fprintf(stderr, "There are no extents for csum range "
5722                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5723                 ret = 1;
5724         }
5725
5726         btrfs_free_path(path);
5727         return ret;
5728 }
5729
5730 static int check_csums(struct btrfs_root *root)
5731 {
5732         struct btrfs_path *path;
5733         struct extent_buffer *leaf;
5734         struct btrfs_key key;
5735         u64 offset = 0, num_bytes = 0;
5736         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5737         int errors = 0;
5738         int ret;
5739         u64 data_len;
5740         unsigned long leaf_offset;
5741
5742         root = root->fs_info->csum_root;
5743         if (!extent_buffer_uptodate(root->node)) {
5744                 fprintf(stderr, "No valid csum tree found\n");
5745                 return -ENOENT;
5746         }
5747
5748         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5749         key.type = BTRFS_EXTENT_CSUM_KEY;
5750         key.offset = 0;
5751
5752         path = btrfs_alloc_path();
5753         if (!path)
5754                 return -ENOMEM;
5755
5756         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5757         if (ret < 0) {
5758                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5759                 btrfs_free_path(path);
5760                 return ret;
5761         }
5762
5763         if (ret > 0 && path->slots[0])
5764                 path->slots[0]--;
5765         ret = 0;
5766
5767         while (1) {
5768                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5769                         ret = btrfs_next_leaf(root, path);
5770                         if (ret < 0) {
5771                                 fprintf(stderr, "Error going to next leaf "
5772                                         "%d\n", ret);
5773                                 break;
5774                         }
5775                         if (ret)
5776                                 break;
5777                 }
5778                 leaf = path->nodes[0];
5779
5780                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5781                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5782                         path->slots[0]++;
5783                         continue;
5784                 }
5785
5786                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5787                               csum_size) * root->sectorsize;
5788                 if (!check_data_csum)
5789                         goto skip_csum_check;
5790                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5791                 ret = check_extent_csums(root, key.offset, data_len,
5792                                          leaf_offset, leaf);
5793                 if (ret)
5794                         break;
5795 skip_csum_check:
5796                 if (!num_bytes) {
5797                         offset = key.offset;
5798                 } else if (key.offset != offset + num_bytes) {
5799                         ret = check_extent_exists(root, offset, num_bytes);
5800                         if (ret) {
5801                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5802                                         "there is no extent record\n",
5803                                         offset, offset+num_bytes);
5804                                 errors++;
5805                         }
5806                         offset = key.offset;
5807                         num_bytes = 0;
5808                 }
5809                 num_bytes += data_len;
5810                 path->slots[0]++;
5811         }
5812
5813         btrfs_free_path(path);
5814         return errors;
5815 }
5816
5817 static int is_dropped_key(struct btrfs_key *key,
5818                           struct btrfs_key *drop_key) {
5819         if (key->objectid < drop_key->objectid)
5820                 return 1;
5821         else if (key->objectid == drop_key->objectid) {
5822                 if (key->type < drop_key->type)
5823                         return 1;
5824                 else if (key->type == drop_key->type) {
5825                         if (key->offset < drop_key->offset)
5826                                 return 1;
5827                 }
5828         }
5829         return 0;
5830 }
5831
5832 /*
5833  * Here are the rules for FULL_BACKREF.
5834  *
5835  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5836  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5837  *      FULL_BACKREF set.
5838  * 3) We cow'ed the block walking down a reloc tree.  This is impossible to tell
5839  *    if it happened after the relocation occurred since we'll have dropped the
5840  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5841  *    have no real way to know for sure.
5842  *
5843  * We process the blocks one root at a time, and we start from the lowest root
5844  * objectid and go to the highest.  So we can just lookup the owner backref for
5845  * the record and if we don't find it then we know it doesn't exist and we have
5846  * a FULL BACKREF.
5847  *
5848  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5849  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5850  * be set or not and then we can check later once we've gathered all the refs.
5851  */
5852 static int calc_extent_flag(struct btrfs_root *root,
5853                            struct cache_tree *extent_cache,
5854                            struct extent_buffer *buf,
5855                            struct root_item_record *ri,
5856                            u64 *flags)
5857 {
5858         struct extent_record *rec;
5859         struct cache_extent *cache;
5860         struct tree_backref *tback;
5861         u64 owner = 0;
5862
5863         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5864         /* we have added this extent before */
5865         BUG_ON(!cache);
5866         rec = container_of(cache, struct extent_record, cache);
5867
5868         /*
5869          * Except file/reloc tree, we can not have
5870          * FULL BACKREF MODE
5871          */
5872         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5873                 goto normal;
5874         /*
5875          * root node
5876          */
5877         if (buf->start == ri->bytenr)
5878                 goto normal;
5879
5880         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5881                 goto full_backref;
5882
5883         owner = btrfs_header_owner(buf);
5884         if (owner == ri->objectid)
5885                 goto normal;
5886
5887         tback = find_tree_backref(rec, 0, owner);
5888         if (!tback)
5889                 goto full_backref;
5890 normal:
5891         *flags = 0;
5892         if (rec->flag_block_full_backref != -1 &&
5893             rec->flag_block_full_backref != 0)
5894                 rec->bad_full_backref = 1;
5895         return 0;
5896 full_backref:
5897         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5898         if (rec->flag_block_full_backref != -1 &&
5899             rec->flag_block_full_backref != 1)
5900                 rec->bad_full_backref = 1;
5901         return 0;
5902 }
5903
5904 static int run_next_block(struct btrfs_root *root,
5905                           struct block_info *bits,
5906                           int bits_nr,
5907                           u64 *last,
5908                           struct cache_tree *pending,
5909                           struct cache_tree *seen,
5910                           struct cache_tree *reada,
5911                           struct cache_tree *nodes,
5912                           struct cache_tree *extent_cache,
5913                           struct cache_tree *chunk_cache,
5914                           struct rb_root *dev_cache,
5915                           struct block_group_tree *block_group_cache,
5916                           struct device_extent_tree *dev_extent_cache,
5917                           struct root_item_record *ri)
5918 {
5919         struct extent_buffer *buf;
5920         struct extent_record *rec = NULL;
5921         u64 bytenr;
5922         u32 size;
5923         u64 parent;
5924         u64 owner;
5925         u64 flags;
5926         u64 ptr;
5927         u64 gen = 0;
5928         int ret = 0;
5929         int i;
5930         int nritems;
5931         struct btrfs_key key;
5932         struct cache_extent *cache;
5933         int reada_bits;
5934
5935         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5936                                     bits_nr, &reada_bits);
5937         if (nritems == 0)
5938                 return 1;
5939
5940         if (!reada_bits) {
5941                 for(i = 0; i < nritems; i++) {
5942                         ret = add_cache_extent(reada, bits[i].start,
5943                                                bits[i].size);
5944                         if (ret == -EEXIST)
5945                                 continue;
5946
5947                         /* fixme, get the parent transid */
5948                         readahead_tree_block(root, bits[i].start,
5949                                              bits[i].size, 0);
5950                 }
5951         }
5952         *last = bits[0].start;
5953         bytenr = bits[0].start;
5954         size = bits[0].size;
5955
5956         cache = lookup_cache_extent(pending, bytenr, size);
5957         if (cache) {
5958                 remove_cache_extent(pending, cache);
5959                 free(cache);
5960         }
5961         cache = lookup_cache_extent(reada, bytenr, size);
5962         if (cache) {
5963                 remove_cache_extent(reada, cache);
5964                 free(cache);
5965         }
5966         cache = lookup_cache_extent(nodes, bytenr, size);
5967         if (cache) {
5968                 remove_cache_extent(nodes, cache);
5969                 free(cache);
5970         }
5971         cache = lookup_cache_extent(extent_cache, bytenr, size);
5972         if (cache) {
5973                 rec = container_of(cache, struct extent_record, cache);
5974                 gen = rec->parent_generation;
5975         }
5976
5977         /* fixme, get the real parent transid */
5978         buf = read_tree_block(root, bytenr, size, gen);
5979         if (!extent_buffer_uptodate(buf)) {
5980                 record_bad_block_io(root->fs_info,
5981                                     extent_cache, bytenr, size);
5982                 goto out;
5983         }
5984
5985         nritems = btrfs_header_nritems(buf);
5986
5987         flags = 0;
5988         if (!init_extent_tree) {
5989                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5990                                        btrfs_header_level(buf), 1, NULL,
5991                                        &flags);
5992                 if (ret < 0) {
5993                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5994                         if (ret < 0) {
5995                                 fprintf(stderr, "Couldn't calc extent flags\n");
5996                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5997                         }
5998                 }
5999         } else {
6000                 flags = 0;
6001                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6002                 if (ret < 0) {
6003                         fprintf(stderr, "Couldn't calc extent flags\n");
6004                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6005                 }
6006         }
6007
6008         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6009                 if (ri != NULL &&
6010                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6011                     ri->objectid == btrfs_header_owner(buf)) {
6012                         /*
6013                          * Ok we got to this block from it's original owner and
6014                          * we have FULL_BACKREF set.  Relocation can leave
6015                          * converted blocks over so this is altogether possible,
6016                          * however it's not possible if the generation > the
6017                          * last snapshot, so check for this case.
6018                          */
6019                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6020                             btrfs_header_generation(buf) > ri->last_snapshot) {
6021                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6022                                 rec->bad_full_backref = 1;
6023                         }
6024                 }
6025         } else {
6026                 if (ri != NULL &&
6027                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6028                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6029                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6030                         rec->bad_full_backref = 1;
6031                 }
6032         }
6033
6034         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6035                 rec->flag_block_full_backref = 1;
6036                 parent = bytenr;
6037                 owner = 0;
6038         } else {
6039                 rec->flag_block_full_backref = 0;
6040                 parent = 0;
6041                 owner = btrfs_header_owner(buf);
6042         }
6043
6044         ret = check_block(root, extent_cache, buf, flags);
6045         if (ret)
6046                 goto out;
6047
6048         if (btrfs_is_leaf(buf)) {
6049                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6050                 for (i = 0; i < nritems; i++) {
6051                         struct btrfs_file_extent_item *fi;
6052                         btrfs_item_key_to_cpu(buf, &key, i);
6053                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6054                                 process_extent_item(root, extent_cache, buf,
6055                                                     i);
6056                                 continue;
6057                         }
6058                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6059                                 process_extent_item(root, extent_cache, buf,
6060                                                     i);
6061                                 continue;
6062                         }
6063                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6064                                 total_csum_bytes +=
6065                                         btrfs_item_size_nr(buf, i);
6066                                 continue;
6067                         }
6068                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6069                                 process_chunk_item(chunk_cache, &key, buf, i);
6070                                 continue;
6071                         }
6072                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6073                                 process_device_item(dev_cache, &key, buf, i);
6074                                 continue;
6075                         }
6076                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6077                                 process_block_group_item(block_group_cache,
6078                                         &key, buf, i);
6079                                 continue;
6080                         }
6081                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6082                                 process_device_extent_item(dev_extent_cache,
6083                                         &key, buf, i);
6084                                 continue;
6085
6086                         }
6087                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6088 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6089                                 process_extent_ref_v0(extent_cache, buf, i);
6090 #else
6091                                 BUG();
6092 #endif
6093                                 continue;
6094                         }
6095
6096                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6097                                 add_tree_backref(extent_cache, key.objectid, 0,
6098                                                  key.offset, 0);
6099                                 continue;
6100                         }
6101                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6102                                 add_tree_backref(extent_cache, key.objectid,
6103                                                  key.offset, 0, 0);
6104                                 continue;
6105                         }
6106                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6107                                 struct btrfs_extent_data_ref *ref;
6108                                 ref = btrfs_item_ptr(buf, i,
6109                                                 struct btrfs_extent_data_ref);
6110                                 add_data_backref(extent_cache,
6111                                         key.objectid, 0,
6112                                         btrfs_extent_data_ref_root(buf, ref),
6113                                         btrfs_extent_data_ref_objectid(buf,
6114                                                                        ref),
6115                                         btrfs_extent_data_ref_offset(buf, ref),
6116                                         btrfs_extent_data_ref_count(buf, ref),
6117                                         0, root->sectorsize);
6118                                 continue;
6119                         }
6120                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6121                                 struct btrfs_shared_data_ref *ref;
6122                                 ref = btrfs_item_ptr(buf, i,
6123                                                 struct btrfs_shared_data_ref);
6124                                 add_data_backref(extent_cache,
6125                                         key.objectid, key.offset, 0, 0, 0,
6126                                         btrfs_shared_data_ref_count(buf, ref),
6127                                         0, root->sectorsize);
6128                                 continue;
6129                         }
6130                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6131                                 struct bad_item *bad;
6132
6133                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6134                                         continue;
6135                                 if (!owner)
6136                                         continue;
6137                                 bad = malloc(sizeof(struct bad_item));
6138                                 if (!bad)
6139                                         continue;
6140                                 INIT_LIST_HEAD(&bad->list);
6141                                 memcpy(&bad->key, &key,
6142                                        sizeof(struct btrfs_key));
6143                                 bad->root_id = owner;
6144                                 list_add_tail(&bad->list, &delete_items);
6145                                 continue;
6146                         }
6147                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6148                                 continue;
6149                         fi = btrfs_item_ptr(buf, i,
6150                                             struct btrfs_file_extent_item);
6151                         if (btrfs_file_extent_type(buf, fi) ==
6152                             BTRFS_FILE_EXTENT_INLINE)
6153                                 continue;
6154                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6155                                 continue;
6156
6157                         data_bytes_allocated +=
6158                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6159                         if (data_bytes_allocated < root->sectorsize) {
6160                                 abort();
6161                         }
6162                         data_bytes_referenced +=
6163                                 btrfs_file_extent_num_bytes(buf, fi);
6164                         add_data_backref(extent_cache,
6165                                 btrfs_file_extent_disk_bytenr(buf, fi),
6166                                 parent, owner, key.objectid, key.offset -
6167                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6168                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6169                 }
6170         } else {
6171                 int level;
6172                 struct btrfs_key first_key;
6173
6174                 first_key.objectid = 0;
6175
6176                 if (nritems > 0)
6177                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6178                 level = btrfs_header_level(buf);
6179                 for (i = 0; i < nritems; i++) {
6180                         ptr = btrfs_node_blockptr(buf, i);
6181                         size = btrfs_level_size(root, level - 1);
6182                         btrfs_node_key_to_cpu(buf, &key, i);
6183                         if (ri != NULL) {
6184                                 if ((level == ri->drop_level)
6185                                     && is_dropped_key(&key, &ri->drop_key)) {
6186                                         continue;
6187                                 }
6188                         }
6189                         ret = add_extent_rec(extent_cache, &key,
6190                                              btrfs_node_ptr_generation(buf, i),
6191                                              ptr, size, 0, 0, 1, 0, 1, 0,
6192                                              size);
6193                         BUG_ON(ret);
6194
6195                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6196
6197                         if (level > 1) {
6198                                 add_pending(nodes, seen, ptr, size);
6199                         } else {
6200                                 add_pending(pending, seen, ptr, size);
6201                         }
6202                 }
6203                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6204                                       nritems) * sizeof(struct btrfs_key_ptr);
6205         }
6206         total_btree_bytes += buf->len;
6207         if (fs_root_objectid(btrfs_header_owner(buf)))
6208                 total_fs_tree_bytes += buf->len;
6209         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6210                 total_extent_tree_bytes += buf->len;
6211         if (!found_old_backref &&
6212             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6213             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6214             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6215                 found_old_backref = 1;
6216 out:
6217         free_extent_buffer(buf);
6218         return ret;
6219 }
6220
6221 static int add_root_to_pending(struct extent_buffer *buf,
6222                                struct cache_tree *extent_cache,
6223                                struct cache_tree *pending,
6224                                struct cache_tree *seen,
6225                                struct cache_tree *nodes,
6226                                u64 objectid)
6227 {
6228         if (btrfs_header_level(buf) > 0)
6229                 add_pending(nodes, seen, buf->start, buf->len);
6230         else
6231                 add_pending(pending, seen, buf->start, buf->len);
6232         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6233                        0, 1, 1, 0, 1, 0, buf->len);
6234
6235         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6236             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6237                 add_tree_backref(extent_cache, buf->start, buf->start,
6238                                  0, 1);
6239         else
6240                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6241         return 0;
6242 }
6243
6244 /* as we fix the tree, we might be deleting blocks that
6245  * we're tracking for repair.  This hook makes sure we
6246  * remove any backrefs for blocks as we are fixing them.
6247  */
6248 static int free_extent_hook(struct btrfs_trans_handle *trans,
6249                             struct btrfs_root *root,
6250                             u64 bytenr, u64 num_bytes, u64 parent,
6251                             u64 root_objectid, u64 owner, u64 offset,
6252                             int refs_to_drop)
6253 {
6254         struct extent_record *rec;
6255         struct cache_extent *cache;
6256         int is_data;
6257         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6258
6259         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6260         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6261         if (!cache)
6262                 return 0;
6263
6264         rec = container_of(cache, struct extent_record, cache);
6265         if (is_data) {
6266                 struct data_backref *back;
6267                 back = find_data_backref(rec, parent, root_objectid, owner,
6268                                          offset, 1, bytenr, num_bytes);
6269                 if (!back)
6270                         goto out;
6271                 if (back->node.found_ref) {
6272                         back->found_ref -= refs_to_drop;
6273                         if (rec->refs)
6274                                 rec->refs -= refs_to_drop;
6275                 }
6276                 if (back->node.found_extent_tree) {
6277                         back->num_refs -= refs_to_drop;
6278                         if (rec->extent_item_refs)
6279                                 rec->extent_item_refs -= refs_to_drop;
6280                 }
6281                 if (back->found_ref == 0)
6282                         back->node.found_ref = 0;
6283                 if (back->num_refs == 0)
6284                         back->node.found_extent_tree = 0;
6285
6286                 if (!back->node.found_extent_tree && back->node.found_ref) {
6287                         list_del(&back->node.list);
6288                         free(back);
6289                 }
6290         } else {
6291                 struct tree_backref *back;
6292                 back = find_tree_backref(rec, parent, root_objectid);
6293                 if (!back)
6294                         goto out;
6295                 if (back->node.found_ref) {
6296                         if (rec->refs)
6297                                 rec->refs--;
6298                         back->node.found_ref = 0;
6299                 }
6300                 if (back->node.found_extent_tree) {
6301                         if (rec->extent_item_refs)
6302                                 rec->extent_item_refs--;
6303                         back->node.found_extent_tree = 0;
6304                 }
6305                 if (!back->node.found_extent_tree && back->node.found_ref) {
6306                         list_del(&back->node.list);
6307                         free(back);
6308                 }
6309         }
6310         maybe_free_extent_rec(extent_cache, rec);
6311 out:
6312         return 0;
6313 }
6314
6315 static int delete_extent_records(struct btrfs_trans_handle *trans,
6316                                  struct btrfs_root *root,
6317                                  struct btrfs_path *path,
6318                                  u64 bytenr, u64 new_len)
6319 {
6320         struct btrfs_key key;
6321         struct btrfs_key found_key;
6322         struct extent_buffer *leaf;
6323         int ret;
6324         int slot;
6325
6326
6327         key.objectid = bytenr;
6328         key.type = (u8)-1;
6329         key.offset = (u64)-1;
6330
6331         while(1) {
6332                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6333                                         &key, path, 0, 1);
6334                 if (ret < 0)
6335                         break;
6336
6337                 if (ret > 0) {
6338                         ret = 0;
6339                         if (path->slots[0] == 0)
6340                                 break;
6341                         path->slots[0]--;
6342                 }
6343                 ret = 0;
6344
6345                 leaf = path->nodes[0];
6346                 slot = path->slots[0];
6347
6348                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6349                 if (found_key.objectid != bytenr)
6350                         break;
6351
6352                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6353                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6354                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6355                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6356                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6357                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6358                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6359                         btrfs_release_path(path);
6360                         if (found_key.type == 0) {
6361                                 if (found_key.offset == 0)
6362                                         break;
6363                                 key.offset = found_key.offset - 1;
6364                                 key.type = found_key.type;
6365                         }
6366                         key.type = found_key.type - 1;
6367                         key.offset = (u64)-1;
6368                         continue;
6369                 }
6370
6371                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6372                         found_key.objectid, found_key.type, found_key.offset);
6373
6374                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6375                 if (ret)
6376                         break;
6377                 btrfs_release_path(path);
6378
6379                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6380                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6381                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6382                                 found_key.offset : root->leafsize;
6383
6384                         ret = btrfs_update_block_group(trans, root, bytenr,
6385                                                        bytes, 0, 0);
6386                         if (ret)
6387                                 break;
6388                 }
6389         }
6390
6391         btrfs_release_path(path);
6392         return ret;
6393 }
6394
6395 /*
6396  * for a single backref, this will allocate a new extent
6397  * and add the backref to it.
6398  */
6399 static int record_extent(struct btrfs_trans_handle *trans,
6400                          struct btrfs_fs_info *info,
6401                          struct btrfs_path *path,
6402                          struct extent_record *rec,
6403                          struct extent_backref *back,
6404                          int allocated, u64 flags)
6405 {
6406         int ret;
6407         struct btrfs_root *extent_root = info->extent_root;
6408         struct extent_buffer *leaf;
6409         struct btrfs_key ins_key;
6410         struct btrfs_extent_item *ei;
6411         struct tree_backref *tback;
6412         struct data_backref *dback;
6413         struct btrfs_tree_block_info *bi;
6414
6415         if (!back->is_data)
6416                 rec->max_size = max_t(u64, rec->max_size,
6417                                     info->extent_root->leafsize);
6418
6419         if (!allocated) {
6420                 u32 item_size = sizeof(*ei);
6421
6422                 if (!back->is_data)
6423                         item_size += sizeof(*bi);
6424
6425                 ins_key.objectid = rec->start;
6426                 ins_key.offset = rec->max_size;
6427                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6428
6429                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6430                                         &ins_key, item_size);
6431                 if (ret)
6432                         goto fail;
6433
6434                 leaf = path->nodes[0];
6435                 ei = btrfs_item_ptr(leaf, path->slots[0],
6436                                     struct btrfs_extent_item);
6437
6438                 btrfs_set_extent_refs(leaf, ei, 0);
6439                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6440
6441                 if (back->is_data) {
6442                         btrfs_set_extent_flags(leaf, ei,
6443                                                BTRFS_EXTENT_FLAG_DATA);
6444                 } else {
6445                         struct btrfs_disk_key copy_key;;
6446
6447                         tback = (struct tree_backref *)back;
6448                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6449                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6450                                              sizeof(*bi));
6451
6452                         btrfs_set_disk_key_objectid(&copy_key,
6453                                                     rec->info_objectid);
6454                         btrfs_set_disk_key_type(&copy_key, 0);
6455                         btrfs_set_disk_key_offset(&copy_key, 0);
6456
6457                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6458                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6459
6460                         btrfs_set_extent_flags(leaf, ei,
6461                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6462                 }
6463
6464                 btrfs_mark_buffer_dirty(leaf);
6465                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6466                                                rec->max_size, 1, 0);
6467                 if (ret)
6468                         goto fail;
6469                 btrfs_release_path(path);
6470         }
6471
6472         if (back->is_data) {
6473                 u64 parent;
6474                 int i;
6475
6476                 dback = (struct data_backref *)back;
6477                 if (back->full_backref)
6478                         parent = dback->parent;
6479                 else
6480                         parent = 0;
6481
6482                 for (i = 0; i < dback->found_ref; i++) {
6483                         /* if parent != 0, we're doing a full backref
6484                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6485                          * just makes the backref allocator create a data
6486                          * backref
6487                          */
6488                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6489                                                    rec->start, rec->max_size,
6490                                                    parent,
6491                                                    dback->root,
6492                                                    parent ?
6493                                                    BTRFS_FIRST_FREE_OBJECTID :
6494                                                    dback->owner,
6495                                                    dback->offset);
6496                         if (ret)
6497                                 break;
6498                 }
6499                 fprintf(stderr, "adding new data backref"
6500                                 " on %llu %s %llu owner %llu"
6501                                 " offset %llu found %d\n",
6502                                 (unsigned long long)rec->start,
6503                                 back->full_backref ?
6504                                 "parent" : "root",
6505                                 back->full_backref ?
6506                                 (unsigned long long)parent :
6507                                 (unsigned long long)dback->root,
6508                                 (unsigned long long)dback->owner,
6509                                 (unsigned long long)dback->offset,
6510                                 dback->found_ref);
6511         } else {
6512                 u64 parent;
6513
6514                 tback = (struct tree_backref *)back;
6515                 if (back->full_backref)
6516                         parent = tback->parent;
6517                 else
6518                         parent = 0;
6519
6520                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6521                                            rec->start, rec->max_size,
6522                                            parent, tback->root, 0, 0);
6523                 fprintf(stderr, "adding new tree backref on "
6524                         "start %llu len %llu parent %llu root %llu\n",
6525                         rec->start, rec->max_size, parent, tback->root);
6526         }
6527 fail:
6528         btrfs_release_path(path);
6529         return ret;
6530 }
6531
6532 struct extent_entry {
6533         u64 bytenr;
6534         u64 bytes;
6535         int count;
6536         int broken;
6537         struct list_head list;
6538 };
6539
6540 static struct extent_entry *find_entry(struct list_head *entries,
6541                                        u64 bytenr, u64 bytes)
6542 {
6543         struct extent_entry *entry = NULL;
6544
6545         list_for_each_entry(entry, entries, list) {
6546                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6547                         return entry;
6548         }
6549
6550         return NULL;
6551 }
6552
6553 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6554 {
6555         struct extent_entry *entry, *best = NULL, *prev = NULL;
6556
6557         list_for_each_entry(entry, entries, list) {
6558                 if (!prev) {
6559                         prev = entry;
6560                         continue;
6561                 }
6562
6563                 /*
6564                  * If there are as many broken entries as entries then we know
6565                  * not to trust this particular entry.
6566                  */
6567                 if (entry->broken == entry->count)
6568                         continue;
6569
6570                 /*
6571                  * If our current entry == best then we can't be sure our best
6572                  * is really the best, so we need to keep searching.
6573                  */
6574                 if (best && best->count == entry->count) {
6575                         prev = entry;
6576                         best = NULL;
6577                         continue;
6578                 }
6579
6580                 /* Prev == entry, not good enough, have to keep searching */
6581                 if (!prev->broken && prev->count == entry->count)
6582                         continue;
6583
6584                 if (!best)
6585                         best = (prev->count > entry->count) ? prev : entry;
6586                 else if (best->count < entry->count)
6587                         best = entry;
6588                 prev = entry;
6589         }
6590
6591         return best;
6592 }
6593
6594 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6595                       struct data_backref *dback, struct extent_entry *entry)
6596 {
6597         struct btrfs_trans_handle *trans;
6598         struct btrfs_root *root;
6599         struct btrfs_file_extent_item *fi;
6600         struct extent_buffer *leaf;
6601         struct btrfs_key key;
6602         u64 bytenr, bytes;
6603         int ret, err;
6604
6605         key.objectid = dback->root;
6606         key.type = BTRFS_ROOT_ITEM_KEY;
6607         key.offset = (u64)-1;
6608         root = btrfs_read_fs_root(info, &key);
6609         if (IS_ERR(root)) {
6610                 fprintf(stderr, "Couldn't find root for our ref\n");
6611                 return -EINVAL;
6612         }
6613
6614         /*
6615          * The backref points to the original offset of the extent if it was
6616          * split, so we need to search down to the offset we have and then walk
6617          * forward until we find the backref we're looking for.
6618          */
6619         key.objectid = dback->owner;
6620         key.type = BTRFS_EXTENT_DATA_KEY;
6621         key.offset = dback->offset;
6622         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6623         if (ret < 0) {
6624                 fprintf(stderr, "Error looking up ref %d\n", ret);
6625                 return ret;
6626         }
6627
6628         while (1) {
6629                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6630                         ret = btrfs_next_leaf(root, path);
6631                         if (ret) {
6632                                 fprintf(stderr, "Couldn't find our ref, next\n");
6633                                 return -EINVAL;
6634                         }
6635                 }
6636                 leaf = path->nodes[0];
6637                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6638                 if (key.objectid != dback->owner ||
6639                     key.type != BTRFS_EXTENT_DATA_KEY) {
6640                         fprintf(stderr, "Couldn't find our ref, search\n");
6641                         return -EINVAL;
6642                 }
6643                 fi = btrfs_item_ptr(leaf, path->slots[0],
6644                                     struct btrfs_file_extent_item);
6645                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6646                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6647
6648                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6649                         break;
6650                 path->slots[0]++;
6651         }
6652
6653         btrfs_release_path(path);
6654
6655         trans = btrfs_start_transaction(root, 1);
6656         if (IS_ERR(trans))
6657                 return PTR_ERR(trans);
6658
6659         /*
6660          * Ok we have the key of the file extent we want to fix, now we can cow
6661          * down to the thing and fix it.
6662          */
6663         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6664         if (ret < 0) {
6665                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6666                         key.objectid, key.type, key.offset, ret);
6667                 goto out;
6668         }
6669         if (ret > 0) {
6670                 fprintf(stderr, "Well that's odd, we just found this key "
6671                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6672                         key.offset);
6673                 ret = -EINVAL;
6674                 goto out;
6675         }
6676         leaf = path->nodes[0];
6677         fi = btrfs_item_ptr(leaf, path->slots[0],
6678                             struct btrfs_file_extent_item);
6679
6680         if (btrfs_file_extent_compression(leaf, fi) &&
6681             dback->disk_bytenr != entry->bytenr) {
6682                 fprintf(stderr, "Ref doesn't match the record start and is "
6683                         "compressed, please take a btrfs-image of this file "
6684                         "system and send it to a btrfs developer so they can "
6685                         "complete this functionality for bytenr %Lu\n",
6686                         dback->disk_bytenr);
6687                 ret = -EINVAL;
6688                 goto out;
6689         }
6690
6691         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6692                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6693         } else if (dback->disk_bytenr > entry->bytenr) {
6694                 u64 off_diff, offset;
6695
6696                 off_diff = dback->disk_bytenr - entry->bytenr;
6697                 offset = btrfs_file_extent_offset(leaf, fi);
6698                 if (dback->disk_bytenr + offset +
6699                     btrfs_file_extent_num_bytes(leaf, fi) >
6700                     entry->bytenr + entry->bytes) {
6701                         fprintf(stderr, "Ref is past the entry end, please "
6702                                 "take a btrfs-image of this file system and "
6703                                 "send it to a btrfs developer, ref %Lu\n",
6704                                 dback->disk_bytenr);
6705                         ret = -EINVAL;
6706                         goto out;
6707                 }
6708                 offset += off_diff;
6709                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6710                 btrfs_set_file_extent_offset(leaf, fi, offset);
6711         } else if (dback->disk_bytenr < entry->bytenr) {
6712                 u64 offset;
6713
6714                 offset = btrfs_file_extent_offset(leaf, fi);
6715                 if (dback->disk_bytenr + offset < entry->bytenr) {
6716                         fprintf(stderr, "Ref is before the entry start, please"
6717                                 " take a btrfs-image of this file system and "
6718                                 "send it to a btrfs developer, ref %Lu\n",
6719                                 dback->disk_bytenr);
6720                         ret = -EINVAL;
6721                         goto out;
6722                 }
6723
6724                 offset += dback->disk_bytenr;
6725                 offset -= entry->bytenr;
6726                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6727                 btrfs_set_file_extent_offset(leaf, fi, offset);
6728         }
6729
6730         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6731
6732         /*
6733          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6734          * only do this if we aren't using compression, otherwise it's a
6735          * trickier case.
6736          */
6737         if (!btrfs_file_extent_compression(leaf, fi))
6738                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6739         else
6740                 printf("ram bytes may be wrong?\n");
6741         btrfs_mark_buffer_dirty(leaf);
6742 out:
6743         err = btrfs_commit_transaction(trans, root);
6744         btrfs_release_path(path);
6745         return ret ? ret : err;
6746 }
6747
6748 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6749                            struct extent_record *rec)
6750 {
6751         struct extent_backref *back;
6752         struct data_backref *dback;
6753         struct extent_entry *entry, *best = NULL;
6754         LIST_HEAD(entries);
6755         int nr_entries = 0;
6756         int broken_entries = 0;
6757         int ret = 0;
6758         short mismatch = 0;
6759
6760         /*
6761          * Metadata is easy and the backrefs should always agree on bytenr and
6762          * size, if not we've got bigger issues.
6763          */
6764         if (rec->metadata)
6765                 return 0;
6766
6767         list_for_each_entry(back, &rec->backrefs, list) {
6768                 if (back->full_backref || !back->is_data)
6769                         continue;
6770
6771                 dback = (struct data_backref *)back;
6772
6773                 /*
6774                  * We only pay attention to backrefs that we found a real
6775                  * backref for.
6776                  */
6777                 if (dback->found_ref == 0)
6778                         continue;
6779
6780                 /*
6781                  * For now we only catch when the bytes don't match, not the
6782                  * bytenr.  We can easily do this at the same time, but I want
6783                  * to have a fs image to test on before we just add repair
6784                  * functionality willy-nilly so we know we won't screw up the
6785                  * repair.
6786                  */
6787
6788                 entry = find_entry(&entries, dback->disk_bytenr,
6789                                    dback->bytes);
6790                 if (!entry) {
6791                         entry = malloc(sizeof(struct extent_entry));
6792                         if (!entry) {
6793                                 ret = -ENOMEM;
6794                                 goto out;
6795                         }
6796                         memset(entry, 0, sizeof(*entry));
6797                         entry->bytenr = dback->disk_bytenr;
6798                         entry->bytes = dback->bytes;
6799                         list_add_tail(&entry->list, &entries);
6800                         nr_entries++;
6801                 }
6802
6803                 /*
6804                  * If we only have on entry we may think the entries agree when
6805                  * in reality they don't so we have to do some extra checking.
6806                  */
6807                 if (dback->disk_bytenr != rec->start ||
6808                     dback->bytes != rec->nr || back->broken)
6809                         mismatch = 1;
6810
6811                 if (back->broken) {
6812                         entry->broken++;
6813                         broken_entries++;
6814                 }
6815
6816                 entry->count++;
6817         }
6818
6819         /* Yay all the backrefs agree, carry on good sir */
6820         if (nr_entries <= 1 && !mismatch)
6821                 goto out;
6822
6823         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6824                 "%Lu\n", rec->start);
6825
6826         /*
6827          * First we want to see if the backrefs can agree amongst themselves who
6828          * is right, so figure out which one of the entries has the highest
6829          * count.
6830          */
6831         best = find_most_right_entry(&entries);
6832
6833         /*
6834          * Ok so we may have an even split between what the backrefs think, so
6835          * this is where we use the extent ref to see what it thinks.
6836          */
6837         if (!best) {
6838                 entry = find_entry(&entries, rec->start, rec->nr);
6839                 if (!entry && (!broken_entries || !rec->found_rec)) {
6840                         fprintf(stderr, "Backrefs don't agree with each other "
6841                                 "and extent record doesn't agree with anybody,"
6842                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6843                                 rec->start, rec->nr);
6844                         ret = -EINVAL;
6845                         goto out;
6846                 } else if (!entry) {
6847                         /*
6848                          * Ok our backrefs were broken, we'll assume this is the
6849                          * correct value and add an entry for this range.
6850                          */
6851                         entry = malloc(sizeof(struct extent_entry));
6852                         if (!entry) {
6853                                 ret = -ENOMEM;
6854                                 goto out;
6855                         }
6856                         memset(entry, 0, sizeof(*entry));
6857                         entry->bytenr = rec->start;
6858                         entry->bytes = rec->nr;
6859                         list_add_tail(&entry->list, &entries);
6860                         nr_entries++;
6861                 }
6862                 entry->count++;
6863                 best = find_most_right_entry(&entries);
6864                 if (!best) {
6865                         fprintf(stderr, "Backrefs and extent record evenly "
6866                                 "split on who is right, this is going to "
6867                                 "require user input to fix bytenr %Lu bytes "
6868                                 "%Lu\n", rec->start, rec->nr);
6869                         ret = -EINVAL;
6870                         goto out;
6871                 }
6872         }
6873
6874         /*
6875          * I don't think this can happen currently as we'll abort() if we catch
6876          * this case higher up, but in case somebody removes that we still can't
6877          * deal with it properly here yet, so just bail out of that's the case.
6878          */
6879         if (best->bytenr != rec->start) {
6880                 fprintf(stderr, "Extent start and backref starts don't match, "
6881                         "please use btrfs-image on this file system and send "
6882                         "it to a btrfs developer so they can make fsck fix "
6883                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6884                         rec->start, rec->nr);
6885                 ret = -EINVAL;
6886                 goto out;
6887         }
6888
6889         /*
6890          * Ok great we all agreed on an extent record, let's go find the real
6891          * references and fix up the ones that don't match.
6892          */
6893         list_for_each_entry(back, &rec->backrefs, list) {
6894                 if (back->full_backref || !back->is_data)
6895                         continue;
6896
6897                 dback = (struct data_backref *)back;
6898
6899                 /*
6900                  * Still ignoring backrefs that don't have a real ref attached
6901                  * to them.
6902                  */
6903                 if (dback->found_ref == 0)
6904                         continue;
6905
6906                 if (dback->bytes == best->bytes &&
6907                     dback->disk_bytenr == best->bytenr)
6908                         continue;
6909
6910                 ret = repair_ref(info, path, dback, best);
6911                 if (ret)
6912                         goto out;
6913         }
6914
6915         /*
6916          * Ok we messed with the actual refs, which means we need to drop our
6917          * entire cache and go back and rescan.  I know this is a huge pain and
6918          * adds a lot of extra work, but it's the only way to be safe.  Once all
6919          * the backrefs agree we may not need to do anything to the extent
6920          * record itself.
6921          */
6922         ret = -EAGAIN;
6923 out:
6924         while (!list_empty(&entries)) {
6925                 entry = list_entry(entries.next, struct extent_entry, list);
6926                 list_del_init(&entry->list);
6927                 free(entry);
6928         }
6929         return ret;
6930 }
6931
6932 static int process_duplicates(struct btrfs_root *root,
6933                               struct cache_tree *extent_cache,
6934                               struct extent_record *rec)
6935 {
6936         struct extent_record *good, *tmp;
6937         struct cache_extent *cache;
6938         int ret;
6939
6940         /*
6941          * If we found a extent record for this extent then return, or if we
6942          * have more than one duplicate we are likely going to need to delete
6943          * something.
6944          */
6945         if (rec->found_rec || rec->num_duplicates > 1)
6946                 return 0;
6947
6948         /* Shouldn't happen but just in case */
6949         BUG_ON(!rec->num_duplicates);
6950
6951         /*
6952          * So this happens if we end up with a backref that doesn't match the
6953          * actual extent entry.  So either the backref is bad or the extent
6954          * entry is bad.  Either way we want to have the extent_record actually
6955          * reflect what we found in the extent_tree, so we need to take the
6956          * duplicate out and use that as the extent_record since the only way we
6957          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6958          */
6959         remove_cache_extent(extent_cache, &rec->cache);
6960
6961         good = list_entry(rec->dups.next, struct extent_record, list);
6962         list_del_init(&good->list);
6963         INIT_LIST_HEAD(&good->backrefs);
6964         INIT_LIST_HEAD(&good->dups);
6965         good->cache.start = good->start;
6966         good->cache.size = good->nr;
6967         good->content_checked = 0;
6968         good->owner_ref_checked = 0;
6969         good->num_duplicates = 0;
6970         good->refs = rec->refs;
6971         list_splice_init(&rec->backrefs, &good->backrefs);
6972         while (1) {
6973                 cache = lookup_cache_extent(extent_cache, good->start,
6974                                             good->nr);
6975                 if (!cache)
6976                         break;
6977                 tmp = container_of(cache, struct extent_record, cache);
6978
6979                 /*
6980                  * If we find another overlapping extent and it's found_rec is
6981                  * set then it's a duplicate and we need to try and delete
6982                  * something.
6983                  */
6984                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6985                         if (list_empty(&good->list))
6986                                 list_add_tail(&good->list,
6987                                               &duplicate_extents);
6988                         good->num_duplicates += tmp->num_duplicates + 1;
6989                         list_splice_init(&tmp->dups, &good->dups);
6990                         list_del_init(&tmp->list);
6991                         list_add_tail(&tmp->list, &good->dups);
6992                         remove_cache_extent(extent_cache, &tmp->cache);
6993                         continue;
6994                 }
6995
6996                 /*
6997                  * Ok we have another non extent item backed extent rec, so lets
6998                  * just add it to this extent and carry on like we did above.
6999                  */
7000                 good->refs += tmp->refs;
7001                 list_splice_init(&tmp->backrefs, &good->backrefs);
7002                 remove_cache_extent(extent_cache, &tmp->cache);
7003                 free(tmp);
7004         }
7005         ret = insert_cache_extent(extent_cache, &good->cache);
7006         BUG_ON(ret);
7007         free(rec);
7008         return good->num_duplicates ? 0 : 1;
7009 }
7010
7011 static int delete_duplicate_records(struct btrfs_root *root,
7012                                     struct extent_record *rec)
7013 {
7014         struct btrfs_trans_handle *trans;
7015         LIST_HEAD(delete_list);
7016         struct btrfs_path *path;
7017         struct extent_record *tmp, *good, *n;
7018         int nr_del = 0;
7019         int ret = 0, err;
7020         struct btrfs_key key;
7021
7022         path = btrfs_alloc_path();
7023         if (!path) {
7024                 ret = -ENOMEM;
7025                 goto out;
7026         }
7027
7028         good = rec;
7029         /* Find the record that covers all of the duplicates. */
7030         list_for_each_entry(tmp, &rec->dups, list) {
7031                 if (good->start < tmp->start)
7032                         continue;
7033                 if (good->nr > tmp->nr)
7034                         continue;
7035
7036                 if (tmp->start + tmp->nr < good->start + good->nr) {
7037                         fprintf(stderr, "Ok we have overlapping extents that "
7038                                 "aren't completely covered by eachother, this "
7039                                 "is going to require more careful thought.  "
7040                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7041                                 tmp->start, tmp->nr, good->start, good->nr);
7042                         abort();
7043                 }
7044                 good = tmp;
7045         }
7046
7047         if (good != rec)
7048                 list_add_tail(&rec->list, &delete_list);
7049
7050         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7051                 if (tmp == good)
7052                         continue;
7053                 list_move_tail(&tmp->list, &delete_list);
7054         }
7055
7056         root = root->fs_info->extent_root;
7057         trans = btrfs_start_transaction(root, 1);
7058         if (IS_ERR(trans)) {
7059                 ret = PTR_ERR(trans);
7060                 goto out;
7061         }
7062
7063         list_for_each_entry(tmp, &delete_list, list) {
7064                 if (tmp->found_rec == 0)
7065                         continue;
7066                 key.objectid = tmp->start;
7067                 key.type = BTRFS_EXTENT_ITEM_KEY;
7068                 key.offset = tmp->nr;
7069
7070                 /* Shouldn't happen but just in case */
7071                 if (tmp->metadata) {
7072                         fprintf(stderr, "Well this shouldn't happen, extent "
7073                                 "record overlaps but is metadata? "
7074                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7075                         abort();
7076                 }
7077
7078                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7079                 if (ret) {
7080                         if (ret > 0)
7081                                 ret = -EINVAL;
7082                         break;
7083                 }
7084                 ret = btrfs_del_item(trans, root, path);
7085                 if (ret)
7086                         break;
7087                 btrfs_release_path(path);
7088                 nr_del++;
7089         }
7090         err = btrfs_commit_transaction(trans, root);
7091         if (err && !ret)
7092                 ret = err;
7093 out:
7094         while (!list_empty(&delete_list)) {
7095                 tmp = list_entry(delete_list.next, struct extent_record, list);
7096                 list_del_init(&tmp->list);
7097                 if (tmp == rec)
7098                         continue;
7099                 free(tmp);
7100         }
7101
7102         while (!list_empty(&rec->dups)) {
7103                 tmp = list_entry(rec->dups.next, struct extent_record, list);
7104                 list_del_init(&tmp->list);
7105                 free(tmp);
7106         }
7107
7108         btrfs_free_path(path);
7109
7110         if (!ret && !nr_del)
7111                 rec->num_duplicates = 0;
7112
7113         return ret ? ret : nr_del;
7114 }
7115
7116 static int find_possible_backrefs(struct btrfs_fs_info *info,
7117                                   struct btrfs_path *path,
7118                                   struct cache_tree *extent_cache,
7119                                   struct extent_record *rec)
7120 {
7121         struct btrfs_root *root;
7122         struct extent_backref *back;
7123         struct data_backref *dback;
7124         struct cache_extent *cache;
7125         struct btrfs_file_extent_item *fi;
7126         struct btrfs_key key;
7127         u64 bytenr, bytes;
7128         int ret;
7129
7130         list_for_each_entry(back, &rec->backrefs, list) {
7131                 /* Don't care about full backrefs (poor unloved backrefs) */
7132                 if (back->full_backref || !back->is_data)
7133                         continue;
7134
7135                 dback = (struct data_backref *)back;
7136
7137                 /* We found this one, we don't need to do a lookup */
7138                 if (dback->found_ref)
7139                         continue;
7140
7141                 key.objectid = dback->root;
7142                 key.type = BTRFS_ROOT_ITEM_KEY;
7143                 key.offset = (u64)-1;
7144
7145                 root = btrfs_read_fs_root(info, &key);
7146
7147                 /* No root, definitely a bad ref, skip */
7148                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7149                         continue;
7150                 /* Other err, exit */
7151                 if (IS_ERR(root))
7152                         return PTR_ERR(root);
7153
7154                 key.objectid = dback->owner;
7155                 key.type = BTRFS_EXTENT_DATA_KEY;
7156                 key.offset = dback->offset;
7157                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7158                 if (ret) {
7159                         btrfs_release_path(path);
7160                         if (ret < 0)
7161                                 return ret;
7162                         /* Didn't find it, we can carry on */
7163                         ret = 0;
7164                         continue;
7165                 }
7166
7167                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7168                                     struct btrfs_file_extent_item);
7169                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7170                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7171                 btrfs_release_path(path);
7172                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7173                 if (cache) {
7174                         struct extent_record *tmp;
7175                         tmp = container_of(cache, struct extent_record, cache);
7176
7177                         /*
7178                          * If we found an extent record for the bytenr for this
7179                          * particular backref then we can't add it to our
7180                          * current extent record.  We only want to add backrefs
7181                          * that don't have a corresponding extent item in the
7182                          * extent tree since they likely belong to this record
7183                          * and we need to fix it if it doesn't match bytenrs.
7184                          */
7185                         if  (tmp->found_rec)
7186                                 continue;
7187                 }
7188
7189                 dback->found_ref += 1;
7190                 dback->disk_bytenr = bytenr;
7191                 dback->bytes = bytes;
7192
7193                 /*
7194                  * Set this so the verify backref code knows not to trust the
7195                  * values in this backref.
7196                  */
7197                 back->broken = 1;
7198         }
7199
7200         return 0;
7201 }
7202
7203 /*
7204  * Record orphan data ref into corresponding root.
7205  *
7206  * Return 0 if the extent item contains data ref and recorded.
7207  * Return 1 if the extent item contains no useful data ref
7208  *   On that case, it may contains only shared_dataref or metadata backref
7209  *   or the file extent exists(this should be handled by the extent bytenr
7210  *   recovery routine)
7211  * Return <0 if something goes wrong.
7212  */
7213 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7214                                       struct extent_record *rec)
7215 {
7216         struct btrfs_key key;
7217         struct btrfs_root *dest_root;
7218         struct extent_backref *back;
7219         struct data_backref *dback;
7220         struct orphan_data_extent *orphan;
7221         struct btrfs_path *path;
7222         int recorded_data_ref = 0;
7223         int ret = 0;
7224
7225         if (rec->metadata)
7226                 return 1;
7227         path = btrfs_alloc_path();
7228         if (!path)
7229                 return -ENOMEM;
7230         list_for_each_entry(back, &rec->backrefs, list) {
7231                 if (back->full_backref || !back->is_data ||
7232                     !back->found_extent_tree)
7233                         continue;
7234                 dback = (struct data_backref *)back;
7235                 if (dback->found_ref)
7236                         continue;
7237                 key.objectid = dback->root;
7238                 key.type = BTRFS_ROOT_ITEM_KEY;
7239                 key.offset = (u64)-1;
7240
7241                 dest_root = btrfs_read_fs_root(fs_info, &key);
7242
7243                 /* For non-exist root we just skip it */
7244                 if (IS_ERR(dest_root) || !dest_root)
7245                         continue;
7246
7247                 key.objectid = dback->owner;
7248                 key.type = BTRFS_EXTENT_DATA_KEY;
7249                 key.offset = dback->offset;
7250
7251                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7252                 /*
7253                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7254                  * we need to record it for inode/file extent rebuild.
7255                  * For ret > 0, we record it only for file extent rebuild.
7256                  * For ret == 0, the file extent exists but only bytenr
7257                  * mismatch, let the original bytenr fix routine to handle,
7258                  * don't record it.
7259                  */
7260                 if (ret == 0)
7261                         continue;
7262                 ret = 0;
7263                 orphan = malloc(sizeof(*orphan));
7264                 if (!orphan) {
7265                         ret = -ENOMEM;
7266                         goto out;
7267                 }
7268                 INIT_LIST_HEAD(&orphan->list);
7269                 orphan->root = dback->root;
7270                 orphan->objectid = dback->owner;
7271                 orphan->offset = dback->offset;
7272                 orphan->disk_bytenr = rec->cache.start;
7273                 orphan->disk_len = rec->cache.size;
7274                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7275                 recorded_data_ref = 1;
7276         }
7277 out:
7278         btrfs_free_path(path);
7279         if (!ret)
7280                 return !recorded_data_ref;
7281         else
7282                 return ret;
7283 }
7284
7285 /*
7286  * when an incorrect extent item is found, this will delete
7287  * all of the existing entries for it and recreate them
7288  * based on what the tree scan found.
7289  */
7290 static int fixup_extent_refs(struct btrfs_fs_info *info,
7291                              struct cache_tree *extent_cache,
7292                              struct extent_record *rec)
7293 {
7294         struct btrfs_trans_handle *trans = NULL;
7295         int ret;
7296         struct btrfs_path *path;
7297         struct list_head *cur = rec->backrefs.next;
7298         struct cache_extent *cache;
7299         struct extent_backref *back;
7300         int allocated = 0;
7301         u64 flags = 0;
7302
7303         if (rec->flag_block_full_backref)
7304                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7305
7306         path = btrfs_alloc_path();
7307         if (!path)
7308                 return -ENOMEM;
7309
7310         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7311                 /*
7312                  * Sometimes the backrefs themselves are so broken they don't
7313                  * get attached to any meaningful rec, so first go back and
7314                  * check any of our backrefs that we couldn't find and throw
7315                  * them into the list if we find the backref so that
7316                  * verify_backrefs can figure out what to do.
7317                  */
7318                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7319                 if (ret < 0)
7320                         goto out;
7321         }
7322
7323         /* step one, make sure all of the backrefs agree */
7324         ret = verify_backrefs(info, path, rec);
7325         if (ret < 0)
7326                 goto out;
7327
7328         trans = btrfs_start_transaction(info->extent_root, 1);
7329         if (IS_ERR(trans)) {
7330                 ret = PTR_ERR(trans);
7331                 goto out;
7332         }
7333
7334         /* step two, delete all the existing records */
7335         ret = delete_extent_records(trans, info->extent_root, path,
7336                                     rec->start, rec->max_size);
7337
7338         if (ret < 0)
7339                 goto out;
7340
7341         /* was this block corrupt?  If so, don't add references to it */
7342         cache = lookup_cache_extent(info->corrupt_blocks,
7343                                     rec->start, rec->max_size);
7344         if (cache) {
7345                 ret = 0;
7346                 goto out;
7347         }
7348
7349         /* step three, recreate all the refs we did find */
7350         while(cur != &rec->backrefs) {
7351                 back = list_entry(cur, struct extent_backref, list);
7352                 cur = cur->next;
7353
7354                 /*
7355                  * if we didn't find any references, don't create a
7356                  * new extent record
7357                  */
7358                 if (!back->found_ref)
7359                         continue;
7360
7361                 rec->bad_full_backref = 0;
7362                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7363                 allocated = 1;
7364
7365                 if (ret)
7366                         goto out;
7367         }
7368 out:
7369         if (trans) {
7370                 int err = btrfs_commit_transaction(trans, info->extent_root);
7371                 if (!ret)
7372                         ret = err;
7373         }
7374
7375         btrfs_free_path(path);
7376         return ret;
7377 }
7378
7379 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7380                               struct extent_record *rec)
7381 {
7382         struct btrfs_trans_handle *trans;
7383         struct btrfs_root *root = fs_info->extent_root;
7384         struct btrfs_path *path;
7385         struct btrfs_extent_item *ei;
7386         struct btrfs_key key;
7387         u64 flags;
7388         int ret = 0;
7389
7390         key.objectid = rec->start;
7391         if (rec->metadata) {
7392                 key.type = BTRFS_METADATA_ITEM_KEY;
7393                 key.offset = rec->info_level;
7394         } else {
7395                 key.type = BTRFS_EXTENT_ITEM_KEY;
7396                 key.offset = rec->max_size;
7397         }
7398
7399         path = btrfs_alloc_path();
7400         if (!path)
7401                 return -ENOMEM;
7402
7403         trans = btrfs_start_transaction(root, 0);
7404         if (IS_ERR(trans)) {
7405                 btrfs_free_path(path);
7406                 return PTR_ERR(trans);
7407         }
7408
7409         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7410         if (ret < 0) {
7411                 btrfs_free_path(path);
7412                 btrfs_commit_transaction(trans, root);
7413                 return ret;
7414         } else if (ret) {
7415                 fprintf(stderr, "Didn't find extent for %llu\n",
7416                         (unsigned long long)rec->start);
7417                 btrfs_free_path(path);
7418                 btrfs_commit_transaction(trans, root);
7419                 return -ENOENT;
7420         }
7421
7422         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7423                             struct btrfs_extent_item);
7424         flags = btrfs_extent_flags(path->nodes[0], ei);
7425         if (rec->flag_block_full_backref) {
7426                 fprintf(stderr, "setting full backref on %llu\n",
7427                         (unsigned long long)key.objectid);
7428                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7429         } else {
7430                 fprintf(stderr, "clearing full backref on %llu\n",
7431                         (unsigned long long)key.objectid);
7432                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7433         }
7434         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7435         btrfs_mark_buffer_dirty(path->nodes[0]);
7436         btrfs_free_path(path);
7437         return btrfs_commit_transaction(trans, root);
7438 }
7439
7440 /* right now we only prune from the extent allocation tree */
7441 static int prune_one_block(struct btrfs_trans_handle *trans,
7442                            struct btrfs_fs_info *info,
7443                            struct btrfs_corrupt_block *corrupt)
7444 {
7445         int ret;
7446         struct btrfs_path path;
7447         struct extent_buffer *eb;
7448         u64 found;
7449         int slot;
7450         int nritems;
7451         int level = corrupt->level + 1;
7452
7453         btrfs_init_path(&path);
7454 again:
7455         /* we want to stop at the parent to our busted block */
7456         path.lowest_level = level;
7457
7458         ret = btrfs_search_slot(trans, info->extent_root,
7459                                 &corrupt->key, &path, -1, 1);
7460
7461         if (ret < 0)
7462                 goto out;
7463
7464         eb = path.nodes[level];
7465         if (!eb) {
7466                 ret = -ENOENT;
7467                 goto out;
7468         }
7469
7470         /*
7471          * hopefully the search gave us the block we want to prune,
7472          * lets try that first
7473          */
7474         slot = path.slots[level];
7475         found =  btrfs_node_blockptr(eb, slot);
7476         if (found == corrupt->cache.start)
7477                 goto del_ptr;
7478
7479         nritems = btrfs_header_nritems(eb);
7480
7481         /* the search failed, lets scan this node and hope we find it */
7482         for (slot = 0; slot < nritems; slot++) {
7483                 found =  btrfs_node_blockptr(eb, slot);
7484                 if (found == corrupt->cache.start)
7485                         goto del_ptr;
7486         }
7487         /*
7488          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7489          * to this block
7490          */
7491         if (eb == info->extent_root->node) {
7492                 ret = -ENOENT;
7493                 goto out;
7494         } else {
7495                 level++;
7496                 btrfs_release_path(&path);
7497                 goto again;
7498         }
7499
7500 del_ptr:
7501         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7502         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7503
7504 out:
7505         btrfs_release_path(&path);
7506         return ret;
7507 }
7508
7509 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7510 {
7511         struct btrfs_trans_handle *trans = NULL;
7512         struct cache_extent *cache;
7513         struct btrfs_corrupt_block *corrupt;
7514
7515         while (1) {
7516                 cache = search_cache_extent(info->corrupt_blocks, 0);
7517                 if (!cache)
7518                         break;
7519                 if (!trans) {
7520                         trans = btrfs_start_transaction(info->extent_root, 1);
7521                         if (IS_ERR(trans))
7522                                 return PTR_ERR(trans);
7523                 }
7524                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7525                 prune_one_block(trans, info, corrupt);
7526                 remove_cache_extent(info->corrupt_blocks, cache);
7527         }
7528         if (trans)
7529                 return btrfs_commit_transaction(trans, info->extent_root);
7530         return 0;
7531 }
7532
7533 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7534 {
7535         struct btrfs_block_group_cache *cache;
7536         u64 start, end;
7537         int ret;
7538
7539         while (1) {
7540                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7541                                             &start, &end, EXTENT_DIRTY);
7542                 if (ret)
7543                         break;
7544                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7545                                    GFP_NOFS);
7546         }
7547
7548         start = 0;
7549         while (1) {
7550                 cache = btrfs_lookup_first_block_group(fs_info, start);
7551                 if (!cache)
7552                         break;
7553                 if (cache->cached)
7554                         cache->cached = 0;
7555                 start = cache->key.objectid + cache->key.offset;
7556         }
7557 }
7558
7559 static int check_extent_refs(struct btrfs_root *root,
7560                              struct cache_tree *extent_cache)
7561 {
7562         struct extent_record *rec;
7563         struct cache_extent *cache;
7564         int err = 0;
7565         int ret = 0;
7566         int fixed = 0;
7567         int had_dups = 0;
7568         int recorded = 0;
7569
7570         if (repair) {
7571                 /*
7572                  * if we're doing a repair, we have to make sure
7573                  * we don't allocate from the problem extents.
7574                  * In the worst case, this will be all the
7575                  * extents in the FS
7576                  */
7577                 cache = search_cache_extent(extent_cache, 0);
7578                 while(cache) {
7579                         rec = container_of(cache, struct extent_record, cache);
7580                         set_extent_dirty(root->fs_info->excluded_extents,
7581                                          rec->start,
7582                                          rec->start + rec->max_size - 1,
7583                                          GFP_NOFS);
7584                         cache = next_cache_extent(cache);
7585                 }
7586
7587                 /* pin down all the corrupted blocks too */
7588                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7589                 while(cache) {
7590                         set_extent_dirty(root->fs_info->excluded_extents,
7591                                          cache->start,
7592                                          cache->start + cache->size - 1,
7593                                          GFP_NOFS);
7594                         cache = next_cache_extent(cache);
7595                 }
7596                 prune_corrupt_blocks(root->fs_info);
7597                 reset_cached_block_groups(root->fs_info);
7598         }
7599
7600         reset_cached_block_groups(root->fs_info);
7601
7602         /*
7603          * We need to delete any duplicate entries we find first otherwise we
7604          * could mess up the extent tree when we have backrefs that actually
7605          * belong to a different extent item and not the weird duplicate one.
7606          */
7607         while (repair && !list_empty(&duplicate_extents)) {
7608                 rec = list_entry(duplicate_extents.next, struct extent_record,
7609                                  list);
7610                 list_del_init(&rec->list);
7611
7612                 /* Sometimes we can find a backref before we find an actual
7613                  * extent, so we need to process it a little bit to see if there
7614                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7615                  * if this is a backref screwup.  If we need to delete stuff
7616                  * process_duplicates() will return 0, otherwise it will return
7617                  * 1 and we
7618                  */
7619                 if (process_duplicates(root, extent_cache, rec))
7620                         continue;
7621                 ret = delete_duplicate_records(root, rec);
7622                 if (ret < 0)
7623                         return ret;
7624                 /*
7625                  * delete_duplicate_records will return the number of entries
7626                  * deleted, so if it's greater than 0 then we know we actually
7627                  * did something and we need to remove.
7628                  */
7629                 if (ret)
7630                         had_dups = 1;
7631         }
7632
7633         if (had_dups)
7634                 return -EAGAIN;
7635
7636         while(1) {
7637                 int cur_err = 0;
7638
7639                 fixed = 0;
7640                 recorded = 0;
7641                 cache = search_cache_extent(extent_cache, 0);
7642                 if (!cache)
7643                         break;
7644                 rec = container_of(cache, struct extent_record, cache);
7645                 if (rec->num_duplicates) {
7646                         fprintf(stderr, "extent item %llu has multiple extent "
7647                                 "items\n", (unsigned long long)rec->start);
7648                         err = 1;
7649                         cur_err = 1;
7650                 }
7651
7652                 if (rec->refs != rec->extent_item_refs) {
7653                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7654                                 (unsigned long long)rec->start,
7655                                 (unsigned long long)rec->nr);
7656                         fprintf(stderr, "extent item %llu, found %llu\n",
7657                                 (unsigned long long)rec->extent_item_refs,
7658                                 (unsigned long long)rec->refs);
7659                         ret = record_orphan_data_extents(root->fs_info, rec);
7660                         if (ret < 0)
7661                                 goto repair_abort;
7662                         if (ret == 0) {
7663                                 recorded = 1;
7664                         } else {
7665                                 /*
7666                                  * we can't use the extent to repair file
7667                                  * extent, let the fallback method handle it.
7668                                  */
7669                                 if (!fixed && repair) {
7670                                         ret = fixup_extent_refs(
7671                                                         root->fs_info,
7672                                                         extent_cache, rec);
7673                                         if (ret)
7674                                                 goto repair_abort;
7675                                         fixed = 1;
7676                                 }
7677                         }
7678                         err = 1;
7679                         cur_err = 1;
7680                 }
7681                 if (all_backpointers_checked(rec, 1)) {
7682                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7683                                 (unsigned long long)rec->start,
7684                                 (unsigned long long)rec->nr);
7685
7686                         if (!fixed && !recorded && repair) {
7687                                 ret = fixup_extent_refs(root->fs_info,
7688                                                         extent_cache, rec);
7689                                 if (ret)
7690                                         goto repair_abort;
7691                                 fixed = 1;
7692                         }
7693                         cur_err = 1;
7694                         err = 1;
7695                 }
7696                 if (!rec->owner_ref_checked) {
7697                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7698                                 (unsigned long long)rec->start,
7699                                 (unsigned long long)rec->nr);
7700                         if (!fixed && !recorded && repair) {
7701                                 ret = fixup_extent_refs(root->fs_info,
7702                                                         extent_cache, rec);
7703                                 if (ret)
7704                                         goto repair_abort;
7705                                 fixed = 1;
7706                         }
7707                         err = 1;
7708                         cur_err = 1;
7709                 }
7710                 if (rec->bad_full_backref) {
7711                         fprintf(stderr, "bad full backref, on [%llu]\n",
7712                                 (unsigned long long)rec->start);
7713                         if (repair) {
7714                                 ret = fixup_extent_flags(root->fs_info, rec);
7715                                 if (ret)
7716                                         goto repair_abort;
7717                                 fixed = 1;
7718                         }
7719                         err = 1;
7720                         cur_err = 1;
7721                 }
7722                 /*
7723                  * Although it's not a extent ref's problem, we reuse this
7724                  * routine for error reporting.
7725                  * No repair function yet.
7726                  */
7727                 if (rec->crossing_stripes) {
7728                         fprintf(stderr,
7729                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7730                                 rec->start, rec->start + rec->max_size);
7731                         err = 1;
7732                         cur_err = 1;
7733                 }
7734
7735                 if (rec->wrong_chunk_type) {
7736                         fprintf(stderr,
7737                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7738                                 rec->start, rec->start + rec->max_size);
7739                         err = 1;
7740                         cur_err = 1;
7741                 }
7742
7743                 remove_cache_extent(extent_cache, cache);
7744                 free_all_extent_backrefs(rec);
7745                 if (!init_extent_tree && repair && (!cur_err || fixed))
7746                         clear_extent_dirty(root->fs_info->excluded_extents,
7747                                            rec->start,
7748                                            rec->start + rec->max_size - 1,
7749                                            GFP_NOFS);
7750                 free(rec);
7751         }
7752 repair_abort:
7753         if (repair) {
7754                 if (ret && ret != -EAGAIN) {
7755                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7756                         exit(1);
7757                 } else if (!ret) {
7758                         struct btrfs_trans_handle *trans;
7759
7760                         root = root->fs_info->extent_root;
7761                         trans = btrfs_start_transaction(root, 1);
7762                         if (IS_ERR(trans)) {
7763                                 ret = PTR_ERR(trans);
7764                                 goto repair_abort;
7765                         }
7766
7767                         btrfs_fix_block_accounting(trans, root);
7768                         ret = btrfs_commit_transaction(trans, root);
7769                         if (ret)
7770                                 goto repair_abort;
7771                 }
7772                 if (err)
7773                         fprintf(stderr, "repaired damaged extent references\n");
7774                 return ret;
7775         }
7776         return err;
7777 }
7778
7779 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7780 {
7781         u64 stripe_size;
7782
7783         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7784                 stripe_size = length;
7785                 stripe_size /= num_stripes;
7786         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7787                 stripe_size = length * 2;
7788                 stripe_size /= num_stripes;
7789         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7790                 stripe_size = length;
7791                 stripe_size /= (num_stripes - 1);
7792         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7793                 stripe_size = length;
7794                 stripe_size /= (num_stripes - 2);
7795         } else {
7796                 stripe_size = length;
7797         }
7798         return stripe_size;
7799 }
7800
7801 /*
7802  * Check the chunk with its block group/dev list ref:
7803  * Return 0 if all refs seems valid.
7804  * Return 1 if part of refs seems valid, need later check for rebuild ref
7805  * like missing block group and needs to search extent tree to rebuild them.
7806  * Return -1 if essential refs are missing and unable to rebuild.
7807  */
7808 static int check_chunk_refs(struct chunk_record *chunk_rec,
7809                             struct block_group_tree *block_group_cache,
7810                             struct device_extent_tree *dev_extent_cache,
7811                             int silent)
7812 {
7813         struct cache_extent *block_group_item;
7814         struct block_group_record *block_group_rec;
7815         struct cache_extent *dev_extent_item;
7816         struct device_extent_record *dev_extent_rec;
7817         u64 devid;
7818         u64 offset;
7819         u64 length;
7820         int metadump_v2 = 0;
7821         int i;
7822         int ret = 0;
7823
7824         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7825                                                chunk_rec->offset,
7826                                                chunk_rec->length);
7827         if (block_group_item) {
7828                 block_group_rec = container_of(block_group_item,
7829                                                struct block_group_record,
7830                                                cache);
7831                 if (chunk_rec->length != block_group_rec->offset ||
7832                     chunk_rec->offset != block_group_rec->objectid ||
7833                     (!metadump_v2 &&
7834                      chunk_rec->type_flags != block_group_rec->flags)) {
7835                         if (!silent)
7836                                 fprintf(stderr,
7837                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7838                                         chunk_rec->objectid,
7839                                         chunk_rec->type,
7840                                         chunk_rec->offset,
7841                                         chunk_rec->length,
7842                                         chunk_rec->offset,
7843                                         chunk_rec->type_flags,
7844                                         block_group_rec->objectid,
7845                                         block_group_rec->type,
7846                                         block_group_rec->offset,
7847                                         block_group_rec->offset,
7848                                         block_group_rec->objectid,
7849                                         block_group_rec->flags);
7850                         ret = -1;
7851                 } else {
7852                         list_del_init(&block_group_rec->list);
7853                         chunk_rec->bg_rec = block_group_rec;
7854                 }
7855         } else {
7856                 if (!silent)
7857                         fprintf(stderr,
7858                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7859                                 chunk_rec->objectid,
7860                                 chunk_rec->type,
7861                                 chunk_rec->offset,
7862                                 chunk_rec->length,
7863                                 chunk_rec->offset,
7864                                 chunk_rec->type_flags);
7865                 ret = 1;
7866         }
7867
7868         if (metadump_v2)
7869                 return ret;
7870
7871         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7872                                     chunk_rec->num_stripes);
7873         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7874                 devid = chunk_rec->stripes[i].devid;
7875                 offset = chunk_rec->stripes[i].offset;
7876                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7877                                                        devid, offset, length);
7878                 if (dev_extent_item) {
7879                         dev_extent_rec = container_of(dev_extent_item,
7880                                                 struct device_extent_record,
7881                                                 cache);
7882                         if (dev_extent_rec->objectid != devid ||
7883                             dev_extent_rec->offset != offset ||
7884                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7885                             dev_extent_rec->length != length) {
7886                                 if (!silent)
7887                                         fprintf(stderr,
7888                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7889                                                 chunk_rec->objectid,
7890                                                 chunk_rec->type,
7891                                                 chunk_rec->offset,
7892                                                 chunk_rec->stripes[i].devid,
7893                                                 chunk_rec->stripes[i].offset,
7894                                                 dev_extent_rec->objectid,
7895                                                 dev_extent_rec->offset,
7896                                                 dev_extent_rec->length);
7897                                 ret = -1;
7898                         } else {
7899                                 list_move(&dev_extent_rec->chunk_list,
7900                                           &chunk_rec->dextents);
7901                         }
7902                 } else {
7903                         if (!silent)
7904                                 fprintf(stderr,
7905                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7906                                         chunk_rec->objectid,
7907                                         chunk_rec->type,
7908                                         chunk_rec->offset,
7909                                         chunk_rec->stripes[i].devid,
7910                                         chunk_rec->stripes[i].offset);
7911                         ret = -1;
7912                 }
7913         }
7914         return ret;
7915 }
7916
7917 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7918 int check_chunks(struct cache_tree *chunk_cache,
7919                  struct block_group_tree *block_group_cache,
7920                  struct device_extent_tree *dev_extent_cache,
7921                  struct list_head *good, struct list_head *bad,
7922                  struct list_head *rebuild, int silent)
7923 {
7924         struct cache_extent *chunk_item;
7925         struct chunk_record *chunk_rec;
7926         struct block_group_record *bg_rec;
7927         struct device_extent_record *dext_rec;
7928         int err;
7929         int ret = 0;
7930
7931         chunk_item = first_cache_extent(chunk_cache);
7932         while (chunk_item) {
7933                 chunk_rec = container_of(chunk_item, struct chunk_record,
7934                                          cache);
7935                 err = check_chunk_refs(chunk_rec, block_group_cache,
7936                                        dev_extent_cache, silent);
7937                 if (err < 0)
7938                         ret = err;
7939                 if (err == 0 && good)
7940                         list_add_tail(&chunk_rec->list, good);
7941                 if (err > 0 && rebuild)
7942                         list_add_tail(&chunk_rec->list, rebuild);
7943                 if (err < 0 && bad)
7944                         list_add_tail(&chunk_rec->list, bad);
7945                 chunk_item = next_cache_extent(chunk_item);
7946         }
7947
7948         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7949                 if (!silent)
7950                         fprintf(stderr,
7951                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7952                                 bg_rec->objectid,
7953                                 bg_rec->offset,
7954                                 bg_rec->flags);
7955                 if (!ret)
7956                         ret = 1;
7957         }
7958
7959         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7960                             chunk_list) {
7961                 if (!silent)
7962                         fprintf(stderr,
7963                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7964                                 dext_rec->objectid,
7965                                 dext_rec->offset,
7966                                 dext_rec->length);
7967                 if (!ret)
7968                         ret = 1;
7969         }
7970         return ret;
7971 }
7972
7973
7974 static int check_device_used(struct device_record *dev_rec,
7975                              struct device_extent_tree *dext_cache)
7976 {
7977         struct cache_extent *cache;
7978         struct device_extent_record *dev_extent_rec;
7979         u64 total_byte = 0;
7980
7981         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7982         while (cache) {
7983                 dev_extent_rec = container_of(cache,
7984                                               struct device_extent_record,
7985                                               cache);
7986                 if (dev_extent_rec->objectid != dev_rec->devid)
7987                         break;
7988
7989                 list_del_init(&dev_extent_rec->device_list);
7990                 total_byte += dev_extent_rec->length;
7991                 cache = next_cache_extent(cache);
7992         }
7993
7994         if (total_byte != dev_rec->byte_used) {
7995                 fprintf(stderr,
7996                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7997                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7998                         dev_rec->type, dev_rec->offset);
7999                 return -1;
8000         } else {
8001                 return 0;
8002         }
8003 }
8004
8005 /* check btrfs_dev_item -> btrfs_dev_extent */
8006 static int check_devices(struct rb_root *dev_cache,
8007                          struct device_extent_tree *dev_extent_cache)
8008 {
8009         struct rb_node *dev_node;
8010         struct device_record *dev_rec;
8011         struct device_extent_record *dext_rec;
8012         int err;
8013         int ret = 0;
8014
8015         dev_node = rb_first(dev_cache);
8016         while (dev_node) {
8017                 dev_rec = container_of(dev_node, struct device_record, node);
8018                 err = check_device_used(dev_rec, dev_extent_cache);
8019                 if (err)
8020                         ret = err;
8021
8022                 dev_node = rb_next(dev_node);
8023         }
8024         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8025                             device_list) {
8026                 fprintf(stderr,
8027                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8028                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8029                 if (!ret)
8030                         ret = 1;
8031         }
8032         return ret;
8033 }
8034
8035 static int add_root_item_to_list(struct list_head *head,
8036                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8037                                   u8 level, u8 drop_level,
8038                                   int level_size, struct btrfs_key *drop_key)
8039 {
8040
8041         struct root_item_record *ri_rec;
8042         ri_rec = malloc(sizeof(*ri_rec));
8043         if (!ri_rec)
8044                 return -ENOMEM;
8045         ri_rec->bytenr = bytenr;
8046         ri_rec->objectid = objectid;
8047         ri_rec->level = level;
8048         ri_rec->level_size = level_size;
8049         ri_rec->drop_level = drop_level;
8050         ri_rec->last_snapshot = last_snapshot;
8051         if (drop_key)
8052                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8053         list_add_tail(&ri_rec->list, head);
8054
8055         return 0;
8056 }
8057
8058 static void free_root_item_list(struct list_head *list)
8059 {
8060         struct root_item_record *ri_rec;
8061
8062         while (!list_empty(list)) {
8063                 ri_rec = list_first_entry(list, struct root_item_record,
8064                                           list);
8065                 list_del_init(&ri_rec->list);
8066                 free(ri_rec);
8067         }
8068 }
8069
8070 static int deal_root_from_list(struct list_head *list,
8071                                struct btrfs_root *root,
8072                                struct block_info *bits,
8073                                int bits_nr,
8074                                struct cache_tree *pending,
8075                                struct cache_tree *seen,
8076                                struct cache_tree *reada,
8077                                struct cache_tree *nodes,
8078                                struct cache_tree *extent_cache,
8079                                struct cache_tree *chunk_cache,
8080                                struct rb_root *dev_cache,
8081                                struct block_group_tree *block_group_cache,
8082                                struct device_extent_tree *dev_extent_cache)
8083 {
8084         int ret = 0;
8085         u64 last;
8086
8087         while (!list_empty(list)) {
8088                 struct root_item_record *rec;
8089                 struct extent_buffer *buf;
8090                 rec = list_entry(list->next,
8091                                  struct root_item_record, list);
8092                 last = 0;
8093                 buf = read_tree_block(root->fs_info->tree_root,
8094                                       rec->bytenr, rec->level_size, 0);
8095                 if (!extent_buffer_uptodate(buf)) {
8096                         free_extent_buffer(buf);
8097                         ret = -EIO;
8098                         break;
8099                 }
8100                 add_root_to_pending(buf, extent_cache, pending,
8101                                     seen, nodes, rec->objectid);
8102                 /*
8103                  * To rebuild extent tree, we need deal with snapshot
8104                  * one by one, otherwise we deal with node firstly which
8105                  * can maximize readahead.
8106                  */
8107                 while (1) {
8108                         ret = run_next_block(root, bits, bits_nr, &last,
8109                                              pending, seen, reada, nodes,
8110                                              extent_cache, chunk_cache,
8111                                              dev_cache, block_group_cache,
8112                                              dev_extent_cache, rec);
8113                         if (ret != 0)
8114                                 break;
8115                 }
8116                 free_extent_buffer(buf);
8117                 list_del(&rec->list);
8118                 free(rec);
8119                 if (ret < 0)
8120                         break;
8121         }
8122         while (ret >= 0) {
8123                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8124                                      reada, nodes, extent_cache, chunk_cache,
8125                                      dev_cache, block_group_cache,
8126                                      dev_extent_cache, NULL);
8127                 if (ret != 0) {
8128                         if (ret > 0)
8129                                 ret = 0;
8130                         break;
8131                 }
8132         }
8133         return ret;
8134 }
8135
8136 static int check_chunks_and_extents(struct btrfs_root *root)
8137 {
8138         struct rb_root dev_cache;
8139         struct cache_tree chunk_cache;
8140         struct block_group_tree block_group_cache;
8141         struct device_extent_tree dev_extent_cache;
8142         struct cache_tree extent_cache;
8143         struct cache_tree seen;
8144         struct cache_tree pending;
8145         struct cache_tree reada;
8146         struct cache_tree nodes;
8147         struct extent_io_tree excluded_extents;
8148         struct cache_tree corrupt_blocks;
8149         struct btrfs_path path;
8150         struct btrfs_key key;
8151         struct btrfs_key found_key;
8152         int ret, err = 0;
8153         struct block_info *bits;
8154         int bits_nr;
8155         struct extent_buffer *leaf;
8156         int slot;
8157         struct btrfs_root_item ri;
8158         struct list_head dropping_trees;
8159         struct list_head normal_trees;
8160         struct btrfs_root *root1;
8161         u64 objectid;
8162         u32 level_size;
8163         u8 level;
8164
8165         dev_cache = RB_ROOT;
8166         cache_tree_init(&chunk_cache);
8167         block_group_tree_init(&block_group_cache);
8168         device_extent_tree_init(&dev_extent_cache);
8169
8170         cache_tree_init(&extent_cache);
8171         cache_tree_init(&seen);
8172         cache_tree_init(&pending);
8173         cache_tree_init(&nodes);
8174         cache_tree_init(&reada);
8175         cache_tree_init(&corrupt_blocks);
8176         extent_io_tree_init(&excluded_extents);
8177         INIT_LIST_HEAD(&dropping_trees);
8178         INIT_LIST_HEAD(&normal_trees);
8179
8180         if (repair) {
8181                 root->fs_info->excluded_extents = &excluded_extents;
8182                 root->fs_info->fsck_extent_cache = &extent_cache;
8183                 root->fs_info->free_extent_hook = free_extent_hook;
8184                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8185         }
8186
8187         bits_nr = 1024;
8188         bits = malloc(bits_nr * sizeof(struct block_info));
8189         if (!bits) {
8190                 perror("malloc");
8191                 exit(1);
8192         }
8193
8194         if (ctx.progress_enabled) {
8195                 ctx.tp = TASK_EXTENTS;
8196                 task_start(ctx.info);
8197         }
8198
8199 again:
8200         root1 = root->fs_info->tree_root;
8201         level = btrfs_header_level(root1->node);
8202         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8203                                     root1->node->start, 0, level, 0,
8204                                     btrfs_level_size(root1, level), NULL);
8205         if (ret < 0)
8206                 goto out;
8207         root1 = root->fs_info->chunk_root;
8208         level = btrfs_header_level(root1->node);
8209         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8210                                     root1->node->start, 0, level, 0,
8211                                     btrfs_level_size(root1, level), NULL);
8212         if (ret < 0)
8213                 goto out;
8214         btrfs_init_path(&path);
8215         key.offset = 0;
8216         key.objectid = 0;
8217         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8218         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8219                                         &key, &path, 0, 0);
8220         if (ret < 0)
8221                 goto out;
8222         while(1) {
8223                 leaf = path.nodes[0];
8224                 slot = path.slots[0];
8225                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8226                         ret = btrfs_next_leaf(root, &path);
8227                         if (ret != 0)
8228                                 break;
8229                         leaf = path.nodes[0];
8230                         slot = path.slots[0];
8231                 }
8232                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8233                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8234                         unsigned long offset;
8235                         u64 last_snapshot;
8236
8237                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8238                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8239                         last_snapshot = btrfs_root_last_snapshot(&ri);
8240                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8241                                 level = btrfs_root_level(&ri);
8242                                 level_size = btrfs_level_size(root, level);
8243                                 ret = add_root_item_to_list(&normal_trees,
8244                                                 found_key.objectid,
8245                                                 btrfs_root_bytenr(&ri),
8246                                                 last_snapshot, level,
8247                                                 0, level_size, NULL);
8248                                 if (ret < 0)
8249                                         goto out;
8250                         } else {
8251                                 level = btrfs_root_level(&ri);
8252                                 level_size = btrfs_level_size(root, level);
8253                                 objectid = found_key.objectid;
8254                                 btrfs_disk_key_to_cpu(&found_key,
8255                                                       &ri.drop_progress);
8256                                 ret = add_root_item_to_list(&dropping_trees,
8257                                                 objectid,
8258                                                 btrfs_root_bytenr(&ri),
8259                                                 last_snapshot, level,
8260                                                 ri.drop_level,
8261                                                 level_size, &found_key);
8262                                 if (ret < 0)
8263                                         goto out;
8264                         }
8265                 }
8266                 path.slots[0]++;
8267         }
8268         btrfs_release_path(&path);
8269
8270         /*
8271          * check_block can return -EAGAIN if it fixes something, please keep
8272          * this in mind when dealing with return values from these functions, if
8273          * we get -EAGAIN we want to fall through and restart the loop.
8274          */
8275         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8276                                   &seen, &reada, &nodes, &extent_cache,
8277                                   &chunk_cache, &dev_cache, &block_group_cache,
8278                                   &dev_extent_cache);
8279         if (ret < 0) {
8280                 if (ret == -EAGAIN)
8281                         goto loop;
8282                 goto out;
8283         }
8284         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8285                                   &pending, &seen, &reada, &nodes,
8286                                   &extent_cache, &chunk_cache, &dev_cache,
8287                                   &block_group_cache, &dev_extent_cache);
8288         if (ret < 0) {
8289                 if (ret == -EAGAIN)
8290                         goto loop;
8291                 goto out;
8292         }
8293
8294         ret = check_chunks(&chunk_cache, &block_group_cache,
8295                            &dev_extent_cache, NULL, NULL, NULL, 0);
8296         if (ret) {
8297                 if (ret == -EAGAIN)
8298                         goto loop;
8299                 err = ret;
8300         }
8301
8302         ret = check_extent_refs(root, &extent_cache);
8303         if (ret < 0) {
8304                 if (ret == -EAGAIN)
8305                         goto loop;
8306                 goto out;
8307         }
8308
8309         ret = check_devices(&dev_cache, &dev_extent_cache);
8310         if (ret && err)
8311                 ret = err;
8312
8313 out:
8314         task_stop(ctx.info);
8315         if (repair) {
8316                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8317                 extent_io_tree_cleanup(&excluded_extents);
8318                 root->fs_info->fsck_extent_cache = NULL;
8319                 root->fs_info->free_extent_hook = NULL;
8320                 root->fs_info->corrupt_blocks = NULL;
8321                 root->fs_info->excluded_extents = NULL;
8322         }
8323         free(bits);
8324         free_chunk_cache_tree(&chunk_cache);
8325         free_device_cache_tree(&dev_cache);
8326         free_block_group_tree(&block_group_cache);
8327         free_device_extent_tree(&dev_extent_cache);
8328         free_extent_cache_tree(&seen);
8329         free_extent_cache_tree(&pending);
8330         free_extent_cache_tree(&reada);
8331         free_extent_cache_tree(&nodes);
8332         return ret;
8333 loop:
8334         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8335         free_extent_cache_tree(&seen);
8336         free_extent_cache_tree(&pending);
8337         free_extent_cache_tree(&reada);
8338         free_extent_cache_tree(&nodes);
8339         free_chunk_cache_tree(&chunk_cache);
8340         free_block_group_tree(&block_group_cache);
8341         free_device_cache_tree(&dev_cache);
8342         free_device_extent_tree(&dev_extent_cache);
8343         free_extent_record_cache(root->fs_info, &extent_cache);
8344         free_root_item_list(&normal_trees);
8345         free_root_item_list(&dropping_trees);
8346         extent_io_tree_cleanup(&excluded_extents);
8347         goto again;
8348 }
8349
8350 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8351                            struct btrfs_root *root, int overwrite)
8352 {
8353         struct extent_buffer *c;
8354         struct extent_buffer *old = root->node;
8355         int level;
8356         int ret;
8357         struct btrfs_disk_key disk_key = {0,0,0};
8358
8359         level = 0;
8360
8361         if (overwrite) {
8362                 c = old;
8363                 extent_buffer_get(c);
8364                 goto init;
8365         }
8366         c = btrfs_alloc_free_block(trans, root,
8367                                    btrfs_level_size(root, 0),
8368                                    root->root_key.objectid,
8369                                    &disk_key, level, 0, 0);
8370         if (IS_ERR(c)) {
8371                 c = old;
8372                 extent_buffer_get(c);
8373                 overwrite = 1;
8374         }
8375 init:
8376         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8377         btrfs_set_header_level(c, level);
8378         btrfs_set_header_bytenr(c, c->start);
8379         btrfs_set_header_generation(c, trans->transid);
8380         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8381         btrfs_set_header_owner(c, root->root_key.objectid);
8382
8383         write_extent_buffer(c, root->fs_info->fsid,
8384                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8385
8386         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8387                             btrfs_header_chunk_tree_uuid(c),
8388                             BTRFS_UUID_SIZE);
8389
8390         btrfs_mark_buffer_dirty(c);
8391         /*
8392          * this case can happen in the following case:
8393          *
8394          * 1.overwrite previous root.
8395          *
8396          * 2.reinit reloc data root, this is because we skip pin
8397          * down reloc data tree before which means we can allocate
8398          * same block bytenr here.
8399          */
8400         if (old->start == c->start) {
8401                 btrfs_set_root_generation(&root->root_item,
8402                                           trans->transid);
8403                 root->root_item.level = btrfs_header_level(root->node);
8404                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8405                                         &root->root_key, &root->root_item);
8406                 if (ret) {
8407                         free_extent_buffer(c);
8408                         return ret;
8409                 }
8410         }
8411         free_extent_buffer(old);
8412         root->node = c;
8413         add_root_to_dirty_list(root);
8414         return 0;
8415 }
8416
8417 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8418                                 struct extent_buffer *eb, int tree_root)
8419 {
8420         struct extent_buffer *tmp;
8421         struct btrfs_root_item *ri;
8422         struct btrfs_key key;
8423         u64 bytenr;
8424         u32 leafsize;
8425         int level = btrfs_header_level(eb);
8426         int nritems;
8427         int ret;
8428         int i;
8429
8430         /*
8431          * If we have pinned this block before, don't pin it again.
8432          * This can not only avoid forever loop with broken filesystem
8433          * but also give us some speedups.
8434          */
8435         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8436                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8437                 return 0;
8438
8439         btrfs_pin_extent(fs_info, eb->start, eb->len);
8440
8441         leafsize = btrfs_super_leafsize(fs_info->super_copy);
8442         nritems = btrfs_header_nritems(eb);
8443         for (i = 0; i < nritems; i++) {
8444                 if (level == 0) {
8445                         btrfs_item_key_to_cpu(eb, &key, i);
8446                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8447                                 continue;
8448                         /* Skip the extent root and reloc roots */
8449                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8450                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8451                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8452                                 continue;
8453                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8454                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8455
8456                         /*
8457                          * If at any point we start needing the real root we
8458                          * will have to build a stump root for the root we are
8459                          * in, but for now this doesn't actually use the root so
8460                          * just pass in extent_root.
8461                          */
8462                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8463                                               leafsize, 0);
8464                         if (!extent_buffer_uptodate(tmp)) {
8465                                 fprintf(stderr, "Error reading root block\n");
8466                                 return -EIO;
8467                         }
8468                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8469                         free_extent_buffer(tmp);
8470                         if (ret)
8471                                 return ret;
8472                 } else {
8473                         bytenr = btrfs_node_blockptr(eb, i);
8474
8475                         /* If we aren't the tree root don't read the block */
8476                         if (level == 1 && !tree_root) {
8477                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
8478                                 continue;
8479                         }
8480
8481                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8482                                               leafsize, 0);
8483                         if (!extent_buffer_uptodate(tmp)) {
8484                                 fprintf(stderr, "Error reading tree block\n");
8485                                 return -EIO;
8486                         }
8487                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8488                         free_extent_buffer(tmp);
8489                         if (ret)
8490                                 return ret;
8491                 }
8492         }
8493
8494         return 0;
8495 }
8496
8497 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8498 {
8499         int ret;
8500
8501         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8502         if (ret)
8503                 return ret;
8504
8505         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8506 }
8507
8508 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8509 {
8510         struct btrfs_block_group_cache *cache;
8511         struct btrfs_path *path;
8512         struct extent_buffer *leaf;
8513         struct btrfs_chunk *chunk;
8514         struct btrfs_key key;
8515         int ret;
8516         u64 start;
8517
8518         path = btrfs_alloc_path();
8519         if (!path)
8520                 return -ENOMEM;
8521
8522         key.objectid = 0;
8523         key.type = BTRFS_CHUNK_ITEM_KEY;
8524         key.offset = 0;
8525
8526         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8527         if (ret < 0) {
8528                 btrfs_free_path(path);
8529                 return ret;
8530         }
8531
8532         /*
8533          * We do this in case the block groups were screwed up and had alloc
8534          * bits that aren't actually set on the chunks.  This happens with
8535          * restored images every time and could happen in real life I guess.
8536          */
8537         fs_info->avail_data_alloc_bits = 0;
8538         fs_info->avail_metadata_alloc_bits = 0;
8539         fs_info->avail_system_alloc_bits = 0;
8540
8541         /* First we need to create the in-memory block groups */
8542         while (1) {
8543                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8544                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8545                         if (ret < 0) {
8546                                 btrfs_free_path(path);
8547                                 return ret;
8548                         }
8549                         if (ret) {
8550                                 ret = 0;
8551                                 break;
8552                         }
8553                 }
8554                 leaf = path->nodes[0];
8555                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8556                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8557                         path->slots[0]++;
8558                         continue;
8559                 }
8560
8561                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8562                                        struct btrfs_chunk);
8563                 btrfs_add_block_group(fs_info, 0,
8564                                       btrfs_chunk_type(leaf, chunk),
8565                                       key.objectid, key.offset,
8566                                       btrfs_chunk_length(leaf, chunk));
8567                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8568                                  key.offset + btrfs_chunk_length(leaf, chunk),
8569                                  GFP_NOFS);
8570                 path->slots[0]++;
8571         }
8572         start = 0;
8573         while (1) {
8574                 cache = btrfs_lookup_first_block_group(fs_info, start);
8575                 if (!cache)
8576                         break;
8577                 cache->cached = 1;
8578                 start = cache->key.objectid + cache->key.offset;
8579         }
8580
8581         btrfs_free_path(path);
8582         return 0;
8583 }
8584
8585 static int reset_balance(struct btrfs_trans_handle *trans,
8586                          struct btrfs_fs_info *fs_info)
8587 {
8588         struct btrfs_root *root = fs_info->tree_root;
8589         struct btrfs_path *path;
8590         struct extent_buffer *leaf;
8591         struct btrfs_key key;
8592         int del_slot, del_nr = 0;
8593         int ret;
8594         int found = 0;
8595
8596         path = btrfs_alloc_path();
8597         if (!path)
8598                 return -ENOMEM;
8599
8600         key.objectid = BTRFS_BALANCE_OBJECTID;
8601         key.type = BTRFS_BALANCE_ITEM_KEY;
8602         key.offset = 0;
8603
8604         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8605         if (ret) {
8606                 if (ret > 0)
8607                         ret = 0;
8608                 if (!ret)
8609                         goto reinit_data_reloc;
8610                 else
8611                         goto out;
8612         }
8613
8614         ret = btrfs_del_item(trans, root, path);
8615         if (ret)
8616                 goto out;
8617         btrfs_release_path(path);
8618
8619         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8620         key.type = BTRFS_ROOT_ITEM_KEY;
8621         key.offset = 0;
8622
8623         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8624         if (ret < 0)
8625                 goto out;
8626         while (1) {
8627                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8628                         if (!found)
8629                                 break;
8630
8631                         if (del_nr) {
8632                                 ret = btrfs_del_items(trans, root, path,
8633                                                       del_slot, del_nr);
8634                                 del_nr = 0;
8635                                 if (ret)
8636                                         goto out;
8637                         }
8638                         key.offset++;
8639                         btrfs_release_path(path);
8640
8641                         found = 0;
8642                         ret = btrfs_search_slot(trans, root, &key, path,
8643                                                 -1, 1);
8644                         if (ret < 0)
8645                                 goto out;
8646                         continue;
8647                 }
8648                 found = 1;
8649                 leaf = path->nodes[0];
8650                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8651                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8652                         break;
8653                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8654                         path->slots[0]++;
8655                         continue;
8656                 }
8657                 if (!del_nr) {
8658                         del_slot = path->slots[0];
8659                         del_nr = 1;
8660                 } else {
8661                         del_nr++;
8662                 }
8663                 path->slots[0]++;
8664         }
8665
8666         if (del_nr) {
8667                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8668                 if (ret)
8669                         goto out;
8670         }
8671         btrfs_release_path(path);
8672
8673 reinit_data_reloc:
8674         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8675         key.type = BTRFS_ROOT_ITEM_KEY;
8676         key.offset = (u64)-1;
8677         root = btrfs_read_fs_root(fs_info, &key);
8678         if (IS_ERR(root)) {
8679                 fprintf(stderr, "Error reading data reloc tree\n");
8680                 ret = PTR_ERR(root);
8681                 goto out;
8682         }
8683         record_root_in_trans(trans, root);
8684         ret = btrfs_fsck_reinit_root(trans, root, 0);
8685         if (ret)
8686                 goto out;
8687         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8688 out:
8689         btrfs_free_path(path);
8690         return ret;
8691 }
8692
8693 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8694                               struct btrfs_fs_info *fs_info)
8695 {
8696         u64 start = 0;
8697         int ret;
8698
8699         /*
8700          * The only reason we don't do this is because right now we're just
8701          * walking the trees we find and pinning down their bytes, we don't look
8702          * at any of the leaves.  In order to do mixed groups we'd have to check
8703          * the leaves of any fs roots and pin down the bytes for any file
8704          * extents we find.  Not hard but why do it if we don't have to?
8705          */
8706         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8707                 fprintf(stderr, "We don't support re-initing the extent tree "
8708                         "for mixed block groups yet, please notify a btrfs "
8709                         "developer you want to do this so they can add this "
8710                         "functionality.\n");
8711                 return -EINVAL;
8712         }
8713
8714         /*
8715          * first we need to walk all of the trees except the extent tree and pin
8716          * down the bytes that are in use so we don't overwrite any existing
8717          * metadata.
8718          */
8719         ret = pin_metadata_blocks(fs_info);
8720         if (ret) {
8721                 fprintf(stderr, "error pinning down used bytes\n");
8722                 return ret;
8723         }
8724
8725         /*
8726          * Need to drop all the block groups since we're going to recreate all
8727          * of them again.
8728          */
8729         btrfs_free_block_groups(fs_info);
8730         ret = reset_block_groups(fs_info);
8731         if (ret) {
8732                 fprintf(stderr, "error resetting the block groups\n");
8733                 return ret;
8734         }
8735
8736         /* Ok we can allocate now, reinit the extent root */
8737         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8738         if (ret) {
8739                 fprintf(stderr, "extent root initialization failed\n");
8740                 /*
8741                  * When the transaction code is updated we should end the
8742                  * transaction, but for now progs only knows about commit so
8743                  * just return an error.
8744                  */
8745                 return ret;
8746         }
8747
8748         /*
8749          * Now we have all the in-memory block groups setup so we can make
8750          * allocations properly, and the metadata we care about is safe since we
8751          * pinned all of it above.
8752          */
8753         while (1) {
8754                 struct btrfs_block_group_cache *cache;
8755
8756                 cache = btrfs_lookup_first_block_group(fs_info, start);
8757                 if (!cache)
8758                         break;
8759                 start = cache->key.objectid + cache->key.offset;
8760                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8761                                         &cache->key, &cache->item,
8762                                         sizeof(cache->item));
8763                 if (ret) {
8764                         fprintf(stderr, "Error adding block group\n");
8765                         return ret;
8766                 }
8767                 btrfs_extent_post_op(trans, fs_info->extent_root);
8768         }
8769
8770         ret = reset_balance(trans, fs_info);
8771         if (ret)
8772                 fprintf(stderr, "error reseting the pending balance\n");
8773
8774         return ret;
8775 }
8776
8777 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8778 {
8779         struct btrfs_path *path;
8780         struct btrfs_trans_handle *trans;
8781         struct btrfs_key key;
8782         int ret;
8783
8784         printf("Recowing metadata block %llu\n", eb->start);
8785         key.objectid = btrfs_header_owner(eb);
8786         key.type = BTRFS_ROOT_ITEM_KEY;
8787         key.offset = (u64)-1;
8788
8789         root = btrfs_read_fs_root(root->fs_info, &key);
8790         if (IS_ERR(root)) {
8791                 fprintf(stderr, "Couldn't find owner root %llu\n",
8792                         key.objectid);
8793                 return PTR_ERR(root);
8794         }
8795
8796         path = btrfs_alloc_path();
8797         if (!path)
8798                 return -ENOMEM;
8799
8800         trans = btrfs_start_transaction(root, 1);
8801         if (IS_ERR(trans)) {
8802                 btrfs_free_path(path);
8803                 return PTR_ERR(trans);
8804         }
8805
8806         path->lowest_level = btrfs_header_level(eb);
8807         if (path->lowest_level)
8808                 btrfs_node_key_to_cpu(eb, &key, 0);
8809         else
8810                 btrfs_item_key_to_cpu(eb, &key, 0);
8811
8812         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8813         btrfs_commit_transaction(trans, root);
8814         btrfs_free_path(path);
8815         return ret;
8816 }
8817
8818 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8819 {
8820         struct btrfs_path *path;
8821         struct btrfs_trans_handle *trans;
8822         struct btrfs_key key;
8823         int ret;
8824
8825         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8826                bad->key.type, bad->key.offset);
8827         key.objectid = bad->root_id;
8828         key.type = BTRFS_ROOT_ITEM_KEY;
8829         key.offset = (u64)-1;
8830
8831         root = btrfs_read_fs_root(root->fs_info, &key);
8832         if (IS_ERR(root)) {
8833                 fprintf(stderr, "Couldn't find owner root %llu\n",
8834                         key.objectid);
8835                 return PTR_ERR(root);
8836         }
8837
8838         path = btrfs_alloc_path();
8839         if (!path)
8840                 return -ENOMEM;
8841
8842         trans = btrfs_start_transaction(root, 1);
8843         if (IS_ERR(trans)) {
8844                 btrfs_free_path(path);
8845                 return PTR_ERR(trans);
8846         }
8847
8848         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8849         if (ret) {
8850                 if (ret > 0)
8851                         ret = 0;
8852                 goto out;
8853         }
8854         ret = btrfs_del_item(trans, root, path);
8855 out:
8856         btrfs_commit_transaction(trans, root);
8857         btrfs_free_path(path);
8858         return ret;
8859 }
8860
8861 static int zero_log_tree(struct btrfs_root *root)
8862 {
8863         struct btrfs_trans_handle *trans;
8864         int ret;
8865
8866         trans = btrfs_start_transaction(root, 1);
8867         if (IS_ERR(trans)) {
8868                 ret = PTR_ERR(trans);
8869                 return ret;
8870         }
8871         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8872         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8873         ret = btrfs_commit_transaction(trans, root);
8874         return ret;
8875 }
8876
8877 static int populate_csum(struct btrfs_trans_handle *trans,
8878                          struct btrfs_root *csum_root, char *buf, u64 start,
8879                          u64 len)
8880 {
8881         u64 offset = 0;
8882         u64 sectorsize;
8883         int ret = 0;
8884
8885         while (offset < len) {
8886                 sectorsize = csum_root->sectorsize;
8887                 ret = read_extent_data(csum_root, buf, start + offset,
8888                                        &sectorsize, 0);
8889                 if (ret)
8890                         break;
8891                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8892                                             start + offset, buf, sectorsize);
8893                 if (ret)
8894                         break;
8895                 offset += sectorsize;
8896         }
8897         return ret;
8898 }
8899
8900 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8901                                       struct btrfs_root *csum_root,
8902                                       struct btrfs_root *cur_root)
8903 {
8904         struct btrfs_path *path;
8905         struct btrfs_key key;
8906         struct extent_buffer *node;
8907         struct btrfs_file_extent_item *fi;
8908         char *buf = NULL;
8909         u64 start = 0;
8910         u64 len = 0;
8911         int slot = 0;
8912         int ret = 0;
8913
8914         path = btrfs_alloc_path();
8915         if (!path)
8916                 return -ENOMEM;
8917         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8918         if (!buf) {
8919                 ret = -ENOMEM;
8920                 goto out;
8921         }
8922
8923         key.objectid = 0;
8924         key.offset = 0;
8925         key.type = 0;
8926
8927         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8928         if (ret < 0)
8929                 goto out;
8930         /* Iterate all regular file extents and fill its csum */
8931         while (1) {
8932                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8933
8934                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8935                         goto next;
8936                 node = path->nodes[0];
8937                 slot = path->slots[0];
8938                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8939                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8940                         goto next;
8941                 start = btrfs_file_extent_disk_bytenr(node, fi);
8942                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8943
8944                 ret = populate_csum(trans, csum_root, buf, start, len);
8945                 if (ret == -EEXIST)
8946                         ret = 0;
8947                 if (ret < 0)
8948                         goto out;
8949 next:
8950                 /*
8951                  * TODO: if next leaf is corrupted, jump to nearest next valid
8952                  * leaf.
8953                  */
8954                 ret = btrfs_next_item(cur_root, path);
8955                 if (ret < 0)
8956                         goto out;
8957                 if (ret > 0) {
8958                         ret = 0;
8959                         goto out;
8960                 }
8961         }
8962
8963 out:
8964         btrfs_free_path(path);
8965         free(buf);
8966         return ret;
8967 }
8968
8969 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8970                                   struct btrfs_root *csum_root)
8971 {
8972         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8973         struct btrfs_path *path;
8974         struct btrfs_root *tree_root = fs_info->tree_root;
8975         struct btrfs_root *cur_root;
8976         struct extent_buffer *node;
8977         struct btrfs_key key;
8978         int slot = 0;
8979         int ret = 0;
8980
8981         path = btrfs_alloc_path();
8982         if (!path)
8983                 return -ENOMEM;
8984
8985         key.objectid = BTRFS_FS_TREE_OBJECTID;
8986         key.offset = 0;
8987         key.type = BTRFS_ROOT_ITEM_KEY;
8988
8989         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8990         if (ret < 0)
8991                 goto out;
8992         if (ret > 0) {
8993                 ret = -ENOENT;
8994                 goto out;
8995         }
8996
8997         while (1) {
8998                 node = path->nodes[0];
8999                 slot = path->slots[0];
9000                 btrfs_item_key_to_cpu(node, &key, slot);
9001                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9002                         goto out;
9003                 if (key.type != BTRFS_ROOT_ITEM_KEY)
9004                         goto next;
9005                 if (!is_fstree(key.objectid))
9006                         goto next;
9007                 key.offset = (u64)-1;
9008
9009                 cur_root = btrfs_read_fs_root(fs_info, &key);
9010                 if (IS_ERR(cur_root) || !cur_root) {
9011                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9012                                 key.objectid);
9013                         goto out;
9014                 }
9015                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9016                                 cur_root);
9017                 if (ret < 0)
9018                         goto out;
9019 next:
9020                 ret = btrfs_next_item(tree_root, path);
9021                 if (ret > 0) {
9022                         ret = 0;
9023                         goto out;
9024                 }
9025                 if (ret < 0)
9026                         goto out;
9027         }
9028
9029 out:
9030         btrfs_free_path(path);
9031         return ret;
9032 }
9033
9034 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9035                                       struct btrfs_root *csum_root)
9036 {
9037         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9038         struct btrfs_path *path;
9039         struct btrfs_extent_item *ei;
9040         struct extent_buffer *leaf;
9041         char *buf;
9042         struct btrfs_key key;
9043         int ret;
9044
9045         path = btrfs_alloc_path();
9046         if (!path)
9047                 return -ENOMEM;
9048
9049         key.objectid = 0;
9050         key.type = BTRFS_EXTENT_ITEM_KEY;
9051         key.offset = 0;
9052
9053         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9054         if (ret < 0) {
9055                 btrfs_free_path(path);
9056                 return ret;
9057         }
9058
9059         buf = malloc(csum_root->sectorsize);
9060         if (!buf) {
9061                 btrfs_free_path(path);
9062                 return -ENOMEM;
9063         }
9064
9065         while (1) {
9066                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9067                         ret = btrfs_next_leaf(extent_root, path);
9068                         if (ret < 0)
9069                                 break;
9070                         if (ret) {
9071                                 ret = 0;
9072                                 break;
9073                         }
9074                 }
9075                 leaf = path->nodes[0];
9076
9077                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9078                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9079                         path->slots[0]++;
9080                         continue;
9081                 }
9082
9083                 ei = btrfs_item_ptr(leaf, path->slots[0],
9084                                     struct btrfs_extent_item);
9085                 if (!(btrfs_extent_flags(leaf, ei) &
9086                       BTRFS_EXTENT_FLAG_DATA)) {
9087                         path->slots[0]++;
9088                         continue;
9089                 }
9090
9091                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9092                                     key.offset);
9093                 if (ret)
9094                         break;
9095                 path->slots[0]++;
9096         }
9097
9098         btrfs_free_path(path);
9099         free(buf);
9100         return ret;
9101 }
9102
9103 /*
9104  * Recalculate the csum and put it into the csum tree.
9105  *
9106  * Extent tree init will wipe out all the extent info, so in that case, we
9107  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9108  * will use fs/subvol trees to init the csum tree.
9109  */
9110 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9111                           struct btrfs_root *csum_root,
9112                           int search_fs_tree)
9113 {
9114         if (search_fs_tree)
9115                 return fill_csum_tree_from_fs(trans, csum_root);
9116         else
9117                 return fill_csum_tree_from_extent(trans, csum_root);
9118 }
9119
9120 struct root_item_info {
9121         /* level of the root */
9122         u8 level;
9123         /* number of nodes at this level, must be 1 for a root */
9124         int node_count;
9125         u64 bytenr;
9126         u64 gen;
9127         struct cache_extent cache_extent;
9128 };
9129
9130 static struct cache_tree *roots_info_cache = NULL;
9131
9132 static void free_roots_info_cache(void)
9133 {
9134         if (!roots_info_cache)
9135                 return;
9136
9137         while (!cache_tree_empty(roots_info_cache)) {
9138                 struct cache_extent *entry;
9139                 struct root_item_info *rii;
9140
9141                 entry = first_cache_extent(roots_info_cache);
9142                 if (!entry)
9143                         break;
9144                 remove_cache_extent(roots_info_cache, entry);
9145                 rii = container_of(entry, struct root_item_info, cache_extent);
9146                 free(rii);
9147         }
9148
9149         free(roots_info_cache);
9150         roots_info_cache = NULL;
9151 }
9152
9153 static int build_roots_info_cache(struct btrfs_fs_info *info)
9154 {
9155         int ret = 0;
9156         struct btrfs_key key;
9157         struct extent_buffer *leaf;
9158         struct btrfs_path *path;
9159
9160         if (!roots_info_cache) {
9161                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9162                 if (!roots_info_cache)
9163                         return -ENOMEM;
9164                 cache_tree_init(roots_info_cache);
9165         }
9166
9167         path = btrfs_alloc_path();
9168         if (!path)
9169                 return -ENOMEM;
9170
9171         key.objectid = 0;
9172         key.type = BTRFS_EXTENT_ITEM_KEY;
9173         key.offset = 0;
9174
9175         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9176         if (ret < 0)
9177                 goto out;
9178         leaf = path->nodes[0];
9179
9180         while (1) {
9181                 struct btrfs_key found_key;
9182                 struct btrfs_extent_item *ei;
9183                 struct btrfs_extent_inline_ref *iref;
9184                 int slot = path->slots[0];
9185                 int type;
9186                 u64 flags;
9187                 u64 root_id;
9188                 u8 level;
9189                 struct cache_extent *entry;
9190                 struct root_item_info *rii;
9191
9192                 if (slot >= btrfs_header_nritems(leaf)) {
9193                         ret = btrfs_next_leaf(info->extent_root, path);
9194                         if (ret < 0) {
9195                                 break;
9196                         } else if (ret) {
9197                                 ret = 0;
9198                                 break;
9199                         }
9200                         leaf = path->nodes[0];
9201                         slot = path->slots[0];
9202                 }
9203
9204                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9205
9206                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9207                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9208                         goto next;
9209
9210                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9211                 flags = btrfs_extent_flags(leaf, ei);
9212
9213                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9214                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9215                         goto next;
9216
9217                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9218                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9219                         level = found_key.offset;
9220                 } else {
9221                         struct btrfs_tree_block_info *binfo;
9222
9223                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
9224                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9225                         level = btrfs_tree_block_level(leaf, binfo);
9226                 }
9227
9228                 /*
9229                  * For a root extent, it must be of the following type and the
9230                  * first (and only one) iref in the item.
9231                  */
9232                 type = btrfs_extent_inline_ref_type(leaf, iref);
9233                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9234                         goto next;
9235
9236                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9237                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9238                 if (!entry) {
9239                         rii = malloc(sizeof(struct root_item_info));
9240                         if (!rii) {
9241                                 ret = -ENOMEM;
9242                                 goto out;
9243                         }
9244                         rii->cache_extent.start = root_id;
9245                         rii->cache_extent.size = 1;
9246                         rii->level = (u8)-1;
9247                         entry = &rii->cache_extent;
9248                         ret = insert_cache_extent(roots_info_cache, entry);
9249                         ASSERT(ret == 0);
9250                 } else {
9251                         rii = container_of(entry, struct root_item_info,
9252                                            cache_extent);
9253                 }
9254
9255                 ASSERT(rii->cache_extent.start == root_id);
9256                 ASSERT(rii->cache_extent.size == 1);
9257
9258                 if (level > rii->level || rii->level == (u8)-1) {
9259                         rii->level = level;
9260                         rii->bytenr = found_key.objectid;
9261                         rii->gen = btrfs_extent_generation(leaf, ei);
9262                         rii->node_count = 1;
9263                 } else if (level == rii->level) {
9264                         rii->node_count++;
9265                 }
9266 next:
9267                 path->slots[0]++;
9268         }
9269
9270 out:
9271         btrfs_free_path(path);
9272
9273         return ret;
9274 }
9275
9276 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9277                                   struct btrfs_path *path,
9278                                   const struct btrfs_key *root_key,
9279                                   const int read_only_mode)
9280 {
9281         const u64 root_id = root_key->objectid;
9282         struct cache_extent *entry;
9283         struct root_item_info *rii;
9284         struct btrfs_root_item ri;
9285         unsigned long offset;
9286
9287         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9288         if (!entry) {
9289                 fprintf(stderr,
9290                         "Error: could not find extent items for root %llu\n",
9291                         root_key->objectid);
9292                 return -ENOENT;
9293         }
9294
9295         rii = container_of(entry, struct root_item_info, cache_extent);
9296         ASSERT(rii->cache_extent.start == root_id);
9297         ASSERT(rii->cache_extent.size == 1);
9298
9299         if (rii->node_count != 1) {
9300                 fprintf(stderr,
9301                         "Error: could not find btree root extent for root %llu\n",
9302                         root_id);
9303                 return -ENOENT;
9304         }
9305
9306         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9307         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9308
9309         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9310             btrfs_root_level(&ri) != rii->level ||
9311             btrfs_root_generation(&ri) != rii->gen) {
9312
9313                 /*
9314                  * If we're in repair mode but our caller told us to not update
9315                  * the root item, i.e. just check if it needs to be updated, don't
9316                  * print this message, since the caller will call us again shortly
9317                  * for the same root item without read only mode (the caller will
9318                  * open a transaction first).
9319                  */
9320                 if (!(read_only_mode && repair))
9321                         fprintf(stderr,
9322                                 "%sroot item for root %llu,"
9323                                 " current bytenr %llu, current gen %llu, current level %u,"
9324                                 " new bytenr %llu, new gen %llu, new level %u\n",
9325                                 (read_only_mode ? "" : "fixing "),
9326                                 root_id,
9327                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9328                                 btrfs_root_level(&ri),
9329                                 rii->bytenr, rii->gen, rii->level);
9330
9331                 if (btrfs_root_generation(&ri) > rii->gen) {
9332                         fprintf(stderr,
9333                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9334                                 root_id, btrfs_root_generation(&ri), rii->gen);
9335                         return -EINVAL;
9336                 }
9337
9338                 if (!read_only_mode) {
9339                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9340                         btrfs_set_root_level(&ri, rii->level);
9341                         btrfs_set_root_generation(&ri, rii->gen);
9342                         write_extent_buffer(path->nodes[0], &ri,
9343                                             offset, sizeof(ri));
9344                 }
9345
9346                 return 1;
9347         }
9348
9349         return 0;
9350 }
9351
9352 /*
9353  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9354  * caused read-only snapshots to be corrupted if they were created at a moment
9355  * when the source subvolume/snapshot had orphan items. The issue was that the
9356  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9357  * node instead of the post orphan cleanup root node.
9358  * So this function, and its callees, just detects and fixes those cases. Even
9359  * though the regression was for read-only snapshots, this function applies to
9360  * any snapshot/subvolume root.
9361  * This must be run before any other repair code - not doing it so, makes other
9362  * repair code delete or modify backrefs in the extent tree for example, which
9363  * will result in an inconsistent fs after repairing the root items.
9364  */
9365 static int repair_root_items(struct btrfs_fs_info *info)
9366 {
9367         struct btrfs_path *path = NULL;
9368         struct btrfs_key key;
9369         struct extent_buffer *leaf;
9370         struct btrfs_trans_handle *trans = NULL;
9371         int ret = 0;
9372         int bad_roots = 0;
9373         int need_trans = 0;
9374
9375         ret = build_roots_info_cache(info);
9376         if (ret)
9377                 goto out;
9378
9379         path = btrfs_alloc_path();
9380         if (!path) {
9381                 ret = -ENOMEM;
9382                 goto out;
9383         }
9384
9385         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9386         key.type = BTRFS_ROOT_ITEM_KEY;
9387         key.offset = 0;
9388
9389 again:
9390         /*
9391          * Avoid opening and committing transactions if a leaf doesn't have
9392          * any root items that need to be fixed, so that we avoid rotating
9393          * backup roots unnecessarily.
9394          */
9395         if (need_trans) {
9396                 trans = btrfs_start_transaction(info->tree_root, 1);
9397                 if (IS_ERR(trans)) {
9398                         ret = PTR_ERR(trans);
9399                         goto out;
9400                 }
9401         }
9402
9403         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9404                                 0, trans ? 1 : 0);
9405         if (ret < 0)
9406                 goto out;
9407         leaf = path->nodes[0];
9408
9409         while (1) {
9410                 struct btrfs_key found_key;
9411
9412                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9413                         int no_more_keys = find_next_key(path, &key);
9414
9415                         btrfs_release_path(path);
9416                         if (trans) {
9417                                 ret = btrfs_commit_transaction(trans,
9418                                                                info->tree_root);
9419                                 trans = NULL;
9420                                 if (ret < 0)
9421                                         goto out;
9422                         }
9423                         need_trans = 0;
9424                         if (no_more_keys)
9425                                 break;
9426                         goto again;
9427                 }
9428
9429                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9430
9431                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9432                         goto next;
9433                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9434                         goto next;
9435
9436                 ret = maybe_repair_root_item(info, path, &found_key,
9437                                              trans ? 0 : 1);
9438                 if (ret < 0)
9439                         goto out;
9440                 if (ret) {
9441                         if (!trans && repair) {
9442                                 need_trans = 1;
9443                                 key = found_key;
9444                                 btrfs_release_path(path);
9445                                 goto again;
9446                         }
9447                         bad_roots++;
9448                 }
9449 next:
9450                 path->slots[0]++;
9451         }
9452         ret = 0;
9453 out:
9454         free_roots_info_cache();
9455         btrfs_free_path(path);
9456         if (trans)
9457                 btrfs_commit_transaction(trans, info->tree_root);
9458         if (ret < 0)
9459                 return ret;
9460
9461         return bad_roots;
9462 }
9463
9464 const char * const cmd_check_usage[] = {
9465         "btrfs check [options] <device>",
9466         "Check structural inegrity of a filesystem (unmounted).",
9467         "Check structural inegrity of an unmounted filesystem. Verify internal",
9468         "trees' consistency and item connectivity. In the repair mode try to",
9469         "fix the problems found.",
9470         "WARNING: the repair mode is considered dangerous",
9471         "",
9472         "-s|--super <superblock>     use this superblock copy",
9473         "-b|--backup                 use the first valid backup root copy",
9474         "--repair                    try to repair the filesystem",
9475         "--readonly                  run in read-only mode (default)",
9476         "--init-csum-tree            create a new CRC tree",
9477         "--init-extent-tree          create a new extent tree",
9478         "--check-data-csum           verify checkums of data blocks",
9479         "-Q|--qgroup-report           print a report on qgroup consistency",
9480         "-E|--subvol-extents <subvolid>",
9481         "                            print subvolume extents and sharing state",
9482         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9483         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
9484         "-p|--progress               indicate progress",
9485         NULL
9486 };
9487
9488 int cmd_check(int argc, char **argv)
9489 {
9490         struct cache_tree root_cache;
9491         struct btrfs_root *root;
9492         struct btrfs_fs_info *info;
9493         u64 bytenr = 0;
9494         u64 subvolid = 0;
9495         u64 tree_root_bytenr = 0;
9496         u64 chunk_root_bytenr = 0;
9497         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9498         int ret;
9499         u64 num;
9500         int init_csum_tree = 0;
9501         int readonly = 0;
9502         int qgroup_report = 0;
9503         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9504
9505         while(1) {
9506                 int c;
9507                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9508                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9509                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
9510                 static const struct option long_options[] = {
9511                         { "super", required_argument, NULL, 's' },
9512                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9513                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9514                         { "init-csum-tree", no_argument, NULL,
9515                                 GETOPT_VAL_INIT_CSUM },
9516                         { "init-extent-tree", no_argument, NULL,
9517                                 GETOPT_VAL_INIT_EXTENT },
9518                         { "check-data-csum", no_argument, NULL,
9519                                 GETOPT_VAL_CHECK_CSUM },
9520                         { "backup", no_argument, NULL, 'b' },
9521                         { "subvol-extents", required_argument, NULL, 'E' },
9522                         { "qgroup-report", no_argument, NULL, 'Q' },
9523                         { "tree-root", required_argument, NULL, 'r' },
9524                         { "chunk-root", required_argument, NULL,
9525                                 GETOPT_VAL_CHUNK_TREE },
9526                         { "progress", no_argument, NULL, 'p' },
9527                         { NULL, 0, NULL, 0}
9528                 };
9529
9530                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9531                 if (c < 0)
9532                         break;
9533                 switch(c) {
9534                         case 'a': /* ignored */ break;
9535                         case 'b':
9536                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9537                                 break;
9538                         case 's':
9539                                 num = arg_strtou64(optarg);
9540                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9541                                         fprintf(stderr,
9542                                                 "ERROR: super mirror should be less than: %d\n",
9543                                                 BTRFS_SUPER_MIRROR_MAX);
9544                                         exit(1);
9545                                 }
9546                                 bytenr = btrfs_sb_offset(((int)num));
9547                                 printf("using SB copy %llu, bytenr %llu\n", num,
9548                                        (unsigned long long)bytenr);
9549                                 break;
9550                         case 'Q':
9551                                 qgroup_report = 1;
9552                                 break;
9553                         case 'E':
9554                                 subvolid = arg_strtou64(optarg);
9555                                 break;
9556                         case 'r':
9557                                 tree_root_bytenr = arg_strtou64(optarg);
9558                                 break;
9559                         case GETOPT_VAL_CHUNK_TREE:
9560                                 chunk_root_bytenr = arg_strtou64(optarg);
9561                                 break;
9562                         case 'p':
9563                                 ctx.progress_enabled = true;
9564                                 break;
9565                         case '?':
9566                         case 'h':
9567                                 usage(cmd_check_usage);
9568                         case GETOPT_VAL_REPAIR:
9569                                 printf("enabling repair mode\n");
9570                                 repair = 1;
9571                                 ctree_flags |= OPEN_CTREE_WRITES;
9572                                 break;
9573                         case GETOPT_VAL_READONLY:
9574                                 readonly = 1;
9575                                 break;
9576                         case GETOPT_VAL_INIT_CSUM:
9577                                 printf("Creating a new CRC tree\n");
9578                                 init_csum_tree = 1;
9579                                 repair = 1;
9580                                 ctree_flags |= OPEN_CTREE_WRITES;
9581                                 break;
9582                         case GETOPT_VAL_INIT_EXTENT:
9583                                 init_extent_tree = 1;
9584                                 ctree_flags |= (OPEN_CTREE_WRITES |
9585                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9586                                 repair = 1;
9587                                 break;
9588                         case GETOPT_VAL_CHECK_CSUM:
9589                                 check_data_csum = 1;
9590                                 break;
9591                 }
9592         }
9593
9594         if (check_argc_exact(argc - optind, 1))
9595                 usage(cmd_check_usage);
9596
9597         if (ctx.progress_enabled) {
9598                 ctx.tp = TASK_NOTHING;
9599                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9600         }
9601
9602         /* This check is the only reason for --readonly to exist */
9603         if (readonly && repair) {
9604                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9605                 exit(1);
9606         }
9607
9608         radix_tree_init();
9609         cache_tree_init(&root_cache);
9610
9611         if((ret = check_mounted(argv[optind])) < 0) {
9612                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9613                 goto err_out;
9614         } else if(ret) {
9615                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9616                 ret = -EBUSY;
9617                 goto err_out;
9618         }
9619
9620         /* only allow partial opening under repair mode */
9621         if (repair)
9622                 ctree_flags |= OPEN_CTREE_PARTIAL;
9623
9624         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9625                                   chunk_root_bytenr, ctree_flags);
9626         if (!info) {
9627                 fprintf(stderr, "Couldn't open file system\n");
9628                 ret = -EIO;
9629                 goto err_out;
9630         }
9631
9632         global_info = info;
9633         root = info->fs_root;
9634
9635         /*
9636          * repair mode will force us to commit transaction which
9637          * will make us fail to load log tree when mounting.
9638          */
9639         if (repair && btrfs_super_log_root(info->super_copy)) {
9640                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9641                 if (!ret) {
9642                         ret = 1;
9643                         goto close_out;
9644                 }
9645                 ret = zero_log_tree(root);
9646                 if (ret) {
9647                         fprintf(stderr, "fail to zero log tree\n");
9648                         goto close_out;
9649                 }
9650         }
9651
9652         uuid_unparse(info->super_copy->fsid, uuidbuf);
9653         if (qgroup_report) {
9654                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9655                        uuidbuf);
9656                 ret = qgroup_verify_all(info);
9657                 if (ret == 0)
9658                         print_qgroup_report(1);
9659                 goto close_out;
9660         }
9661         if (subvolid) {
9662                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9663                        subvolid, argv[optind], uuidbuf);
9664                 ret = print_extent_state(info, subvolid);
9665                 goto close_out;
9666         }
9667         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9668
9669         if (!extent_buffer_uptodate(info->tree_root->node) ||
9670             !extent_buffer_uptodate(info->dev_root->node) ||
9671             !extent_buffer_uptodate(info->chunk_root->node)) {
9672                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9673                 ret = -EIO;
9674                 goto close_out;
9675         }
9676
9677         if (init_extent_tree || init_csum_tree) {
9678                 struct btrfs_trans_handle *trans;
9679
9680                 trans = btrfs_start_transaction(info->extent_root, 0);
9681                 if (IS_ERR(trans)) {
9682                         fprintf(stderr, "Error starting transaction\n");
9683                         ret = PTR_ERR(trans);
9684                         goto close_out;
9685                 }
9686
9687                 if (init_extent_tree) {
9688                         printf("Creating a new extent tree\n");
9689                         ret = reinit_extent_tree(trans, info);
9690                         if (ret)
9691                                 goto close_out;
9692                 }
9693
9694                 if (init_csum_tree) {
9695                         fprintf(stderr, "Reinit crc root\n");
9696                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9697                         if (ret) {
9698                                 fprintf(stderr, "crc root initialization failed\n");
9699                                 ret = -EIO;
9700                                 goto close_out;
9701                         }
9702
9703                         ret = fill_csum_tree(trans, info->csum_root,
9704                                              init_extent_tree);
9705                         if (ret) {
9706                                 fprintf(stderr, "crc refilling failed\n");
9707                                 return -EIO;
9708                         }
9709                 }
9710                 /*
9711                  * Ok now we commit and run the normal fsck, which will add
9712                  * extent entries for all of the items it finds.
9713                  */
9714                 ret = btrfs_commit_transaction(trans, info->extent_root);
9715                 if (ret)
9716                         goto close_out;
9717         }
9718         if (!extent_buffer_uptodate(info->extent_root->node)) {
9719                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9720                 ret = -EIO;
9721                 goto close_out;
9722         }
9723         if (!extent_buffer_uptodate(info->csum_root->node)) {
9724                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9725                 ret = -EIO;
9726                 goto close_out;
9727         }
9728
9729         if (!ctx.progress_enabled)
9730                 fprintf(stderr, "checking extents\n");
9731         ret = check_chunks_and_extents(root);
9732         if (ret)
9733                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9734
9735         ret = repair_root_items(info);
9736         if (ret < 0)
9737                 goto close_out;
9738         if (repair) {
9739                 fprintf(stderr, "Fixed %d roots.\n", ret);
9740                 ret = 0;
9741         } else if (ret > 0) {
9742                 fprintf(stderr,
9743                        "Found %d roots with an outdated root item.\n",
9744                        ret);
9745                 fprintf(stderr,
9746                         "Please run a filesystem check with the option --repair to fix them.\n");
9747                 ret = 1;
9748                 goto close_out;
9749         }
9750
9751         if (!ctx.progress_enabled) {
9752                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
9753                         fprintf(stderr, "checking free space tree\n");
9754                 else
9755                         fprintf(stderr, "checking free space cache\n");
9756         }
9757         ret = check_space_cache(root);
9758         if (ret)
9759                 goto out;
9760
9761         /*
9762          * We used to have to have these hole extents in between our real
9763          * extents so if we don't have this flag set we need to make sure there
9764          * are no gaps in the file extents for inodes, otherwise we can just
9765          * ignore it when this happens.
9766          */
9767         no_holes = btrfs_fs_incompat(root->fs_info,
9768                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9769         if (!ctx.progress_enabled)
9770                 fprintf(stderr, "checking fs roots\n");
9771         ret = check_fs_roots(root, &root_cache);
9772         if (ret)
9773                 goto out;
9774
9775         fprintf(stderr, "checking csums\n");
9776         ret = check_csums(root);
9777         if (ret)
9778                 goto out;
9779
9780         fprintf(stderr, "checking root refs\n");
9781         ret = check_root_refs(root, &root_cache);
9782         if (ret)
9783                 goto out;
9784
9785         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9786                 struct extent_buffer *eb;
9787
9788                 eb = list_first_entry(&root->fs_info->recow_ebs,
9789                                       struct extent_buffer, recow);
9790                 list_del_init(&eb->recow);
9791                 ret = recow_extent_buffer(root, eb);
9792                 if (ret)
9793                         break;
9794         }
9795
9796         while (!list_empty(&delete_items)) {
9797                 struct bad_item *bad;
9798
9799                 bad = list_first_entry(&delete_items, struct bad_item, list);
9800                 list_del_init(&bad->list);
9801                 if (repair)
9802                         ret = delete_bad_item(root, bad);
9803                 free(bad);
9804         }
9805
9806         if (info->quota_enabled) {
9807                 int err;
9808                 fprintf(stderr, "checking quota groups\n");
9809                 err = qgroup_verify_all(info);
9810                 if (err)
9811                         goto out;
9812         }
9813
9814         if (!list_empty(&root->fs_info->recow_ebs)) {
9815                 fprintf(stderr, "Transid errors in file system\n");
9816                 ret = 1;
9817         }
9818 out:
9819         print_qgroup_report(0);
9820         if (found_old_backref) { /*
9821                  * there was a disk format change when mixed
9822                  * backref was in testing tree. The old format
9823                  * existed about one week.
9824                  */
9825                 printf("\n * Found old mixed backref format. "
9826                        "The old format is not supported! *"
9827                        "\n * Please mount the FS in readonly mode, "
9828                        "backup data and re-format the FS. *\n\n");
9829                 ret = 1;
9830         }
9831         printf("found %llu bytes used err is %d\n",
9832                (unsigned long long)bytes_used, ret);
9833         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9834         printf("total tree bytes: %llu\n",
9835                (unsigned long long)total_btree_bytes);
9836         printf("total fs tree bytes: %llu\n",
9837                (unsigned long long)total_fs_tree_bytes);
9838         printf("total extent tree bytes: %llu\n",
9839                (unsigned long long)total_extent_tree_bytes);
9840         printf("btree space waste bytes: %llu\n",
9841                (unsigned long long)btree_space_waste);
9842         printf("file data blocks allocated: %llu\n referenced %llu\n",
9843                 (unsigned long long)data_bytes_allocated,
9844                 (unsigned long long)data_bytes_referenced);
9845
9846         free_root_recs_tree(&root_cache);
9847 close_out:
9848         close_ctree(root);
9849 err_out:
9850         if (ctx.progress_enabled)
9851                 task_deinit(ctx.info);
9852
9853         return ret;
9854 }