btrfs-progs: catch memory allocation failure in splice_shared_node
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "btrfsck.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
41 #include "backref.h"
42 #include "ulist.h"
43
44 enum task_position {
45         TASK_EXTENTS,
46         TASK_FREE_SPACE,
47         TASK_FS_ROOTS,
48         TASK_NOTHING, /* have to be the last element */
49 };
50
51 struct task_ctx {
52         int progress_enabled;
53         enum task_position tp;
54
55         struct task_info *info;
56 };
57
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75
76 static void *print_status_check(void *p)
77 {
78         struct task_ctx *priv = p;
79         const char work_indicator[] = { '.', 'o', 'O', 'o' };
80         uint32_t count = 0;
81         static char *task_position_string[] = {
82                 "checking extents",
83                 "checking free space cache",
84                 "checking fs roots",
85         };
86
87         task_period_start(priv->info, 1000 /* 1s */);
88
89         if (priv->tp == TASK_NOTHING)
90                 return NULL;
91
92         while (1) {
93                 printf("%s [%c]\r", task_position_string[priv->tp],
94                                 work_indicator[count % 4]);
95                 count++;
96                 fflush(stdout);
97                 task_period_wait(priv->info);
98         }
99         return NULL;
100 }
101
102 static int print_status_return(void *p)
103 {
104         printf("\n");
105         fflush(stdout);
106
107         return 0;
108 }
109
110 struct extent_backref {
111         struct list_head list;
112         unsigned int is_data:1;
113         unsigned int found_extent_tree:1;
114         unsigned int full_backref:1;
115         unsigned int found_ref:1;
116         unsigned int broken:1;
117 };
118
119 struct data_backref {
120         struct extent_backref node;
121         union {
122                 u64 parent;
123                 u64 root;
124         };
125         u64 owner;
126         u64 offset;
127         u64 disk_bytenr;
128         u64 bytes;
129         u64 ram_bytes;
130         u32 num_refs;
131         u32 found_ref;
132 };
133
134 /*
135  * Much like data_backref, just removed the undetermined members
136  * and change it to use list_head.
137  * During extent scan, it is stored in root->orphan_data_extent.
138  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
139  */
140 struct orphan_data_extent {
141         struct list_head list;
142         u64 root;
143         u64 objectid;
144         u64 offset;
145         u64 disk_bytenr;
146         u64 disk_len;
147 };
148
149 struct tree_backref {
150         struct extent_backref node;
151         union {
152                 u64 parent;
153                 u64 root;
154         };
155 };
156
157 struct extent_record {
158         struct list_head backrefs;
159         struct list_head dups;
160         struct list_head list;
161         struct cache_extent cache;
162         struct btrfs_disk_key parent_key;
163         u64 start;
164         u64 max_size;
165         u64 nr;
166         u64 refs;
167         u64 extent_item_refs;
168         u64 generation;
169         u64 parent_generation;
170         u64 info_objectid;
171         u32 num_duplicates;
172         u8 info_level;
173         int flag_block_full_backref;
174         unsigned int found_rec:1;
175         unsigned int content_checked:1;
176         unsigned int owner_ref_checked:1;
177         unsigned int is_root:1;
178         unsigned int metadata:1;
179         unsigned int bad_full_backref:1;
180         unsigned int crossing_stripes:1;
181         unsigned int wrong_chunk_type:1;
182 };
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         unsigned int filetype:8;
190         int errors;
191         unsigned int ref_type;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 struct root_item_record {
199         struct list_head list;
200         u64 objectid;
201         u64 bytenr;
202         u64 last_snapshot;
203         u8 level;
204         u8 drop_level;
205         int level_size;
206         struct btrfs_key drop_key;
207 };
208
209 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
211 #define REF_ERR_NO_INODE_REF            (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
214 #define REF_ERR_DUP_INODE_REF           (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF             (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
222
223 struct file_extent_hole {
224         struct rb_node node;
225         u64 start;
226         u64 len;
227 };
228
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
231 {
232         struct file_extent_hole *hole;
233
234         if (RB_EMPTY_ROOT(holes))
235                 return (u64)-1;
236
237         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
238         return hole->start;
239 }
240
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
242 {
243         struct file_extent_hole *hole1;
244         struct file_extent_hole *hole2;
245
246         hole1 = rb_entry(node1, struct file_extent_hole, node);
247         hole2 = rb_entry(node2, struct file_extent_hole, node);
248
249         if (hole1->start > hole2->start)
250                 return -1;
251         if (hole1->start < hole2->start)
252                 return 1;
253         /* Now hole1->start == hole2->start */
254         if (hole1->len >= hole2->len)
255                 /*
256                  * Hole 1 will be merge center
257                  * Same hole will be merged later
258                  */
259                 return -1;
260         /* Hole 2 will be merge center */
261         return 1;
262 }
263
264 /*
265  * Add a hole to the record
266  *
267  * This will do hole merge for copy_file_extent_holes(),
268  * which will ensure there won't be continuous holes.
269  */
270 static int add_file_extent_hole(struct rb_root *holes,
271                                 u64 start, u64 len)
272 {
273         struct file_extent_hole *hole;
274         struct file_extent_hole *prev = NULL;
275         struct file_extent_hole *next = NULL;
276
277         hole = malloc(sizeof(*hole));
278         if (!hole)
279                 return -ENOMEM;
280         hole->start = start;
281         hole->len = len;
282         /* Since compare will not return 0, no -EEXIST will happen */
283         rb_insert(holes, &hole->node, compare_hole);
284
285         /* simple merge with previous hole */
286         if (rb_prev(&hole->node))
287                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
288                                 node);
289         if (prev && prev->start + prev->len >= hole->start) {
290                 hole->len = hole->start + hole->len - prev->start;
291                 hole->start = prev->start;
292                 rb_erase(&prev->node, holes);
293                 free(prev);
294                 prev = NULL;
295         }
296
297         /* iterate merge with next holes */
298         while (1) {
299                 if (!rb_next(&hole->node))
300                         break;
301                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
302                                         node);
303                 if (hole->start + hole->len >= next->start) {
304                         if (hole->start + hole->len <= next->start + next->len)
305                                 hole->len = next->start + next->len -
306                                             hole->start;
307                         rb_erase(&next->node, holes);
308                         free(next);
309                         next = NULL;
310                 } else
311                         break;
312         }
313         return 0;
314 }
315
316 static int compare_hole_range(struct rb_node *node, void *data)
317 {
318         struct file_extent_hole *hole;
319         u64 start;
320
321         hole = (struct file_extent_hole *)data;
322         start = hole->start;
323
324         hole = rb_entry(node, struct file_extent_hole, node);
325         if (start < hole->start)
326                 return -1;
327         if (start >= hole->start && start < hole->start + hole->len)
328                 return 0;
329         return 1;
330 }
331
332 /*
333  * Delete a hole in the record
334  *
335  * This will do the hole split and is much restrict than add.
336  */
337 static int del_file_extent_hole(struct rb_root *holes,
338                                 u64 start, u64 len)
339 {
340         struct file_extent_hole *hole;
341         struct file_extent_hole tmp;
342         u64 prev_start = 0;
343         u64 prev_len = 0;
344         u64 next_start = 0;
345         u64 next_len = 0;
346         struct rb_node *node;
347         int have_prev = 0;
348         int have_next = 0;
349         int ret = 0;
350
351         tmp.start = start;
352         tmp.len = len;
353         node = rb_search(holes, &tmp, compare_hole_range, NULL);
354         if (!node)
355                 return -EEXIST;
356         hole = rb_entry(node, struct file_extent_hole, node);
357         if (start + len > hole->start + hole->len)
358                 return -EEXIST;
359
360         /*
361          * Now there will be no overflap, delete the hole and re-add the
362          * split(s) if they exists.
363          */
364         if (start > hole->start) {
365                 prev_start = hole->start;
366                 prev_len = start - hole->start;
367                 have_prev = 1;
368         }
369         if (hole->start + hole->len > start + len) {
370                 next_start = start + len;
371                 next_len = hole->start + hole->len - start - len;
372                 have_next = 1;
373         }
374         rb_erase(node, holes);
375         free(hole);
376         if (have_prev) {
377                 ret = add_file_extent_hole(holes, prev_start, prev_len);
378                 if (ret < 0)
379                         return ret;
380         }
381         if (have_next) {
382                 ret = add_file_extent_hole(holes, next_start, next_len);
383                 if (ret < 0)
384                         return ret;
385         }
386         return 0;
387 }
388
389 static int copy_file_extent_holes(struct rb_root *dst,
390                                   struct rb_root *src)
391 {
392         struct file_extent_hole *hole;
393         struct rb_node *node;
394         int ret = 0;
395
396         node = rb_first(src);
397         while (node) {
398                 hole = rb_entry(node, struct file_extent_hole, node);
399                 ret = add_file_extent_hole(dst, hole->start, hole->len);
400                 if (ret)
401                         break;
402                 node = rb_next(node);
403         }
404         return ret;
405 }
406
407 static void free_file_extent_holes(struct rb_root *holes)
408 {
409         struct rb_node *node;
410         struct file_extent_hole *hole;
411
412         node = rb_first(holes);
413         while (node) {
414                 hole = rb_entry(node, struct file_extent_hole, node);
415                 rb_erase(node, holes);
416                 free(hole);
417                 node = rb_first(holes);
418         }
419 }
420
421 struct inode_record {
422         struct list_head backrefs;
423         unsigned int checked:1;
424         unsigned int merging:1;
425         unsigned int found_inode_item:1;
426         unsigned int found_dir_item:1;
427         unsigned int found_file_extent:1;
428         unsigned int found_csum_item:1;
429         unsigned int some_csum_missing:1;
430         unsigned int nodatasum:1;
431         int errors;
432
433         u64 ino;
434         u32 nlink;
435         u32 imode;
436         u64 isize;
437         u64 nbytes;
438
439         u32 found_link;
440         u64 found_size;
441         u64 extent_start;
442         u64 extent_end;
443         struct rb_root holes;
444         struct list_head orphan_extents;
445
446         u32 refs;
447 };
448
449 #define I_ERR_NO_INODE_ITEM             (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
464
465 struct root_backref {
466         struct list_head list;
467         unsigned int found_dir_item:1;
468         unsigned int found_dir_index:1;
469         unsigned int found_back_ref:1;
470         unsigned int found_forward_ref:1;
471         unsigned int reachable:1;
472         int errors;
473         u64 ref_root;
474         u64 dir;
475         u64 index;
476         u16 namelen;
477         char name[0];
478 };
479
480 struct root_record {
481         struct list_head backrefs;
482         struct cache_extent cache;
483         unsigned int found_root_item:1;
484         u64 objectid;
485         u32 found_ref;
486 };
487
488 struct ptr_node {
489         struct cache_extent cache;
490         void *data;
491 };
492
493 struct shared_node {
494         struct cache_extent cache;
495         struct cache_tree root_cache;
496         struct cache_tree inode_cache;
497         struct inode_record *current;
498         u32 refs;
499 };
500
501 struct block_info {
502         u64 start;
503         u32 size;
504 };
505
506 struct walk_control {
507         struct cache_tree shared;
508         struct shared_node *nodes[BTRFS_MAX_LEVEL];
509         int active_node;
510         int root_level;
511 };
512
513 struct bad_item {
514         struct btrfs_key key;
515         u64 root_id;
516         struct list_head list;
517 };
518
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
520
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522                                  struct btrfs_root *root)
523 {
524         if (root->last_trans != trans->transid) {
525                 root->track_dirty = 1;
526                 root->last_trans = trans->transid;
527                 root->commit_root = root->node;
528                 extent_buffer_get(root->node);
529         }
530 }
531
532 static u8 imode_to_type(u32 imode)
533 {
534 #define S_SHIFT 12
535         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
537                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
538                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
539                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
540                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
541                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
542                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
543         };
544
545         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
546 #undef S_SHIFT
547 }
548
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
550 {
551         struct device_record *rec1;
552         struct device_record *rec2;
553
554         rec1 = rb_entry(node1, struct device_record, node);
555         rec2 = rb_entry(node2, struct device_record, node);
556         if (rec1->devid > rec2->devid)
557                 return -1;
558         else if (rec1->devid < rec2->devid)
559                 return 1;
560         else
561                 return 0;
562 }
563
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
565 {
566         struct inode_record *rec;
567         struct inode_backref *backref;
568         struct inode_backref *orig;
569         struct inode_backref *tmp;
570         struct orphan_data_extent *src_orphan;
571         struct orphan_data_extent *dst_orphan;
572         size_t size;
573         int ret;
574
575         rec = malloc(sizeof(*rec));
576         if (!rec)
577                 return ERR_PTR(-ENOMEM);
578         memcpy(rec, orig_rec, sizeof(*rec));
579         rec->refs = 1;
580         INIT_LIST_HEAD(&rec->backrefs);
581         INIT_LIST_HEAD(&rec->orphan_extents);
582         rec->holes = RB_ROOT;
583
584         list_for_each_entry(orig, &orig_rec->backrefs, list) {
585                 size = sizeof(*orig) + orig->namelen + 1;
586                 backref = malloc(size);
587                 if (!backref) {
588                         ret = -ENOMEM;
589                         goto cleanup;
590                 }
591                 memcpy(backref, orig, size);
592                 list_add_tail(&backref->list, &rec->backrefs);
593         }
594         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
595                 dst_orphan = malloc(sizeof(*dst_orphan));
596                 if (!dst_orphan) {
597                         ret = -ENOMEM;
598                         goto cleanup;
599                 }
600                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
601                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
602         }
603         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
604         BUG_ON(ret < 0);
605
606         return rec;
607
608 cleanup:
609         if (!list_empty(&rec->backrefs))
610                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
611                         list_del(&orig->list);
612                         free(orig);
613                 }
614
615         if (!list_empty(&rec->orphan_extents))
616                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
617                         list_del(&orig->list);
618                         free(orig);
619                 }
620
621         free(rec);
622
623         return ERR_PTR(ret);
624 }
625
626 static void print_orphan_data_extents(struct list_head *orphan_extents,
627                                       u64 objectid)
628 {
629         struct orphan_data_extent *orphan;
630
631         if (list_empty(orphan_extents))
632                 return;
633         printf("The following data extent is lost in tree %llu:\n",
634                objectid);
635         list_for_each_entry(orphan, orphan_extents, list) {
636                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
637                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
638                        orphan->disk_len);
639         }
640 }
641
642 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
643 {
644         u64 root_objectid = root->root_key.objectid;
645         int errors = rec->errors;
646
647         if (!errors)
648                 return;
649         /* reloc root errors, we print its corresponding fs root objectid*/
650         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
651                 root_objectid = root->root_key.offset;
652                 fprintf(stderr, "reloc");
653         }
654         fprintf(stderr, "root %llu inode %llu errors %x",
655                 (unsigned long long) root_objectid,
656                 (unsigned long long) rec->ino, rec->errors);
657
658         if (errors & I_ERR_NO_INODE_ITEM)
659                 fprintf(stderr, ", no inode item");
660         if (errors & I_ERR_NO_ORPHAN_ITEM)
661                 fprintf(stderr, ", no orphan item");
662         if (errors & I_ERR_DUP_INODE_ITEM)
663                 fprintf(stderr, ", dup inode item");
664         if (errors & I_ERR_DUP_DIR_INDEX)
665                 fprintf(stderr, ", dup dir index");
666         if (errors & I_ERR_ODD_DIR_ITEM)
667                 fprintf(stderr, ", odd dir item");
668         if (errors & I_ERR_ODD_FILE_EXTENT)
669                 fprintf(stderr, ", odd file extent");
670         if (errors & I_ERR_BAD_FILE_EXTENT)
671                 fprintf(stderr, ", bad file extent");
672         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
673                 fprintf(stderr, ", file extent overlap");
674         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
675                 fprintf(stderr, ", file extent discount");
676         if (errors & I_ERR_DIR_ISIZE_WRONG)
677                 fprintf(stderr, ", dir isize wrong");
678         if (errors & I_ERR_FILE_NBYTES_WRONG)
679                 fprintf(stderr, ", nbytes wrong");
680         if (errors & I_ERR_ODD_CSUM_ITEM)
681                 fprintf(stderr, ", odd csum item");
682         if (errors & I_ERR_SOME_CSUM_MISSING)
683                 fprintf(stderr, ", some csum missing");
684         if (errors & I_ERR_LINK_COUNT_WRONG)
685                 fprintf(stderr, ", link count wrong");
686         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
687                 fprintf(stderr, ", orphan file extent");
688         fprintf(stderr, "\n");
689         /* Print the orphan extents if needed */
690         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
691                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
692
693         /* Print the holes if needed */
694         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
695                 struct file_extent_hole *hole;
696                 struct rb_node *node;
697                 int found = 0;
698
699                 node = rb_first(&rec->holes);
700                 fprintf(stderr, "Found file extent holes:\n");
701                 while (node) {
702                         found = 1;
703                         hole = rb_entry(node, struct file_extent_hole, node);
704                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
705                                 hole->start, hole->len);
706                         node = rb_next(node);
707                 }
708                 if (!found)
709                         fprintf(stderr, "\tstart: 0, len: %llu\n",
710                                 round_up(rec->isize, root->sectorsize));
711         }
712 }
713
714 static void print_ref_error(int errors)
715 {
716         if (errors & REF_ERR_NO_DIR_ITEM)
717                 fprintf(stderr, ", no dir item");
718         if (errors & REF_ERR_NO_DIR_INDEX)
719                 fprintf(stderr, ", no dir index");
720         if (errors & REF_ERR_NO_INODE_REF)
721                 fprintf(stderr, ", no inode ref");
722         if (errors & REF_ERR_DUP_DIR_ITEM)
723                 fprintf(stderr, ", dup dir item");
724         if (errors & REF_ERR_DUP_DIR_INDEX)
725                 fprintf(stderr, ", dup dir index");
726         if (errors & REF_ERR_DUP_INODE_REF)
727                 fprintf(stderr, ", dup inode ref");
728         if (errors & REF_ERR_INDEX_UNMATCH)
729                 fprintf(stderr, ", index unmatch");
730         if (errors & REF_ERR_FILETYPE_UNMATCH)
731                 fprintf(stderr, ", filetype unmatch");
732         if (errors & REF_ERR_NAME_TOO_LONG)
733                 fprintf(stderr, ", name too long");
734         if (errors & REF_ERR_NO_ROOT_REF)
735                 fprintf(stderr, ", no root ref");
736         if (errors & REF_ERR_NO_ROOT_BACKREF)
737                 fprintf(stderr, ", no root backref");
738         if (errors & REF_ERR_DUP_ROOT_REF)
739                 fprintf(stderr, ", dup root ref");
740         if (errors & REF_ERR_DUP_ROOT_BACKREF)
741                 fprintf(stderr, ", dup root backref");
742         fprintf(stderr, "\n");
743 }
744
745 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
746                                           u64 ino, int mod)
747 {
748         struct ptr_node *node;
749         struct cache_extent *cache;
750         struct inode_record *rec = NULL;
751         int ret;
752
753         cache = lookup_cache_extent(inode_cache, ino, 1);
754         if (cache) {
755                 node = container_of(cache, struct ptr_node, cache);
756                 rec = node->data;
757                 if (mod && rec->refs > 1) {
758                         node->data = clone_inode_rec(rec);
759                         if (IS_ERR(node->data))
760                                 return node->data;
761                         rec->refs--;
762                         rec = node->data;
763                 }
764         } else if (mod) {
765                 rec = calloc(1, sizeof(*rec));
766                 if (!rec)
767                         return ERR_PTR(-ENOMEM);
768                 rec->ino = ino;
769                 rec->extent_start = (u64)-1;
770                 rec->refs = 1;
771                 INIT_LIST_HEAD(&rec->backrefs);
772                 INIT_LIST_HEAD(&rec->orphan_extents);
773                 rec->holes = RB_ROOT;
774
775                 node = malloc(sizeof(*node));
776                 if (!node) {
777                         free(rec);
778                         return ERR_PTR(-ENOMEM);
779                 }
780                 node->cache.start = ino;
781                 node->cache.size = 1;
782                 node->data = rec;
783
784                 if (ino == BTRFS_FREE_INO_OBJECTID)
785                         rec->found_link = 1;
786
787                 ret = insert_cache_extent(inode_cache, &node->cache);
788                 if (ret)
789                         return ERR_PTR(-EEXIST);
790         }
791         return rec;
792 }
793
794 static void free_orphan_data_extents(struct list_head *orphan_extents)
795 {
796         struct orphan_data_extent *orphan;
797
798         while (!list_empty(orphan_extents)) {
799                 orphan = list_entry(orphan_extents->next,
800                                     struct orphan_data_extent, list);
801                 list_del(&orphan->list);
802                 free(orphan);
803         }
804 }
805
806 static void free_inode_rec(struct inode_record *rec)
807 {
808         struct inode_backref *backref;
809
810         if (--rec->refs > 0)
811                 return;
812
813         while (!list_empty(&rec->backrefs)) {
814                 backref = list_entry(rec->backrefs.next,
815                                      struct inode_backref, list);
816                 list_del(&backref->list);
817                 free(backref);
818         }
819         free_orphan_data_extents(&rec->orphan_extents);
820         free_file_extent_holes(&rec->holes);
821         free(rec);
822 }
823
824 static int can_free_inode_rec(struct inode_record *rec)
825 {
826         if (!rec->errors && rec->checked && rec->found_inode_item &&
827             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
828                 return 1;
829         return 0;
830 }
831
832 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
833                                  struct inode_record *rec)
834 {
835         struct cache_extent *cache;
836         struct inode_backref *tmp, *backref;
837         struct ptr_node *node;
838         unsigned char filetype;
839
840         if (!rec->found_inode_item)
841                 return;
842
843         filetype = imode_to_type(rec->imode);
844         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
845                 if (backref->found_dir_item && backref->found_dir_index) {
846                         if (backref->filetype != filetype)
847                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
848                         if (!backref->errors && backref->found_inode_ref &&
849                             rec->nlink == rec->found_link) {
850                                 list_del(&backref->list);
851                                 free(backref);
852                         }
853                 }
854         }
855
856         if (!rec->checked || rec->merging)
857                 return;
858
859         if (S_ISDIR(rec->imode)) {
860                 if (rec->found_size != rec->isize)
861                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
862                 if (rec->found_file_extent)
863                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
864         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
865                 if (rec->found_dir_item)
866                         rec->errors |= I_ERR_ODD_DIR_ITEM;
867                 if (rec->found_size != rec->nbytes)
868                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
869                 if (rec->nlink > 0 && !no_holes &&
870                     (rec->extent_end < rec->isize ||
871                      first_extent_gap(&rec->holes) < rec->isize))
872                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
873         }
874
875         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
876                 if (rec->found_csum_item && rec->nodatasum)
877                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
878                 if (rec->some_csum_missing && !rec->nodatasum)
879                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
880         }
881
882         BUG_ON(rec->refs != 1);
883         if (can_free_inode_rec(rec)) {
884                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
885                 node = container_of(cache, struct ptr_node, cache);
886                 BUG_ON(node->data != rec);
887                 remove_cache_extent(inode_cache, &node->cache);
888                 free(node);
889                 free_inode_rec(rec);
890         }
891 }
892
893 static int check_orphan_item(struct btrfs_root *root, u64 ino)
894 {
895         struct btrfs_path path;
896         struct btrfs_key key;
897         int ret;
898
899         key.objectid = BTRFS_ORPHAN_OBJECTID;
900         key.type = BTRFS_ORPHAN_ITEM_KEY;
901         key.offset = ino;
902
903         btrfs_init_path(&path);
904         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
905         btrfs_release_path(&path);
906         if (ret > 0)
907                 ret = -ENOENT;
908         return ret;
909 }
910
911 static int process_inode_item(struct extent_buffer *eb,
912                               int slot, struct btrfs_key *key,
913                               struct shared_node *active_node)
914 {
915         struct inode_record *rec;
916         struct btrfs_inode_item *item;
917
918         rec = active_node->current;
919         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
920         if (rec->found_inode_item) {
921                 rec->errors |= I_ERR_DUP_INODE_ITEM;
922                 return 1;
923         }
924         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
925         rec->nlink = btrfs_inode_nlink(eb, item);
926         rec->isize = btrfs_inode_size(eb, item);
927         rec->nbytes = btrfs_inode_nbytes(eb, item);
928         rec->imode = btrfs_inode_mode(eb, item);
929         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
930                 rec->nodatasum = 1;
931         rec->found_inode_item = 1;
932         if (rec->nlink == 0)
933                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
934         maybe_free_inode_rec(&active_node->inode_cache, rec);
935         return 0;
936 }
937
938 static struct inode_backref *get_inode_backref(struct inode_record *rec,
939                                                 const char *name,
940                                                 int namelen, u64 dir)
941 {
942         struct inode_backref *backref;
943
944         list_for_each_entry(backref, &rec->backrefs, list) {
945                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
946                         break;
947                 if (backref->dir != dir || backref->namelen != namelen)
948                         continue;
949                 if (memcmp(name, backref->name, namelen))
950                         continue;
951                 return backref;
952         }
953
954         backref = malloc(sizeof(*backref) + namelen + 1);
955         if (!backref)
956                 return NULL;
957         memset(backref, 0, sizeof(*backref));
958         backref->dir = dir;
959         backref->namelen = namelen;
960         memcpy(backref->name, name, namelen);
961         backref->name[namelen] = '\0';
962         list_add_tail(&backref->list, &rec->backrefs);
963         return backref;
964 }
965
966 static int add_inode_backref(struct cache_tree *inode_cache,
967                              u64 ino, u64 dir, u64 index,
968                              const char *name, int namelen,
969                              int filetype, int itemtype, int errors)
970 {
971         struct inode_record *rec;
972         struct inode_backref *backref;
973
974         rec = get_inode_rec(inode_cache, ino, 1);
975         BUG_ON(IS_ERR(rec));
976         backref = get_inode_backref(rec, name, namelen, dir);
977         BUG_ON(!backref);
978         if (errors)
979                 backref->errors |= errors;
980         if (itemtype == BTRFS_DIR_INDEX_KEY) {
981                 if (backref->found_dir_index)
982                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
983                 if (backref->found_inode_ref && backref->index != index)
984                         backref->errors |= REF_ERR_INDEX_UNMATCH;
985                 if (backref->found_dir_item && backref->filetype != filetype)
986                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
987
988                 backref->index = index;
989                 backref->filetype = filetype;
990                 backref->found_dir_index = 1;
991         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
992                 rec->found_link++;
993                 if (backref->found_dir_item)
994                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
995                 if (backref->found_dir_index && backref->filetype != filetype)
996                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
997
998                 backref->filetype = filetype;
999                 backref->found_dir_item = 1;
1000         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1001                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1002                 if (backref->found_inode_ref)
1003                         backref->errors |= REF_ERR_DUP_INODE_REF;
1004                 if (backref->found_dir_index && backref->index != index)
1005                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1006                 else
1007                         backref->index = index;
1008
1009                 backref->ref_type = itemtype;
1010                 backref->found_inode_ref = 1;
1011         } else {
1012                 BUG_ON(1);
1013         }
1014
1015         maybe_free_inode_rec(inode_cache, rec);
1016         return 0;
1017 }
1018
1019 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1020                             struct cache_tree *dst_cache)
1021 {
1022         struct inode_backref *backref;
1023         u32 dir_count = 0;
1024         int ret = 0;
1025
1026         dst->merging = 1;
1027         list_for_each_entry(backref, &src->backrefs, list) {
1028                 if (backref->found_dir_index) {
1029                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1030                                         backref->index, backref->name,
1031                                         backref->namelen, backref->filetype,
1032                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1033                 }
1034                 if (backref->found_dir_item) {
1035                         dir_count++;
1036                         add_inode_backref(dst_cache, dst->ino,
1037                                         backref->dir, 0, backref->name,
1038                                         backref->namelen, backref->filetype,
1039                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1040                 }
1041                 if (backref->found_inode_ref) {
1042                         add_inode_backref(dst_cache, dst->ino,
1043                                         backref->dir, backref->index,
1044                                         backref->name, backref->namelen, 0,
1045                                         backref->ref_type, backref->errors);
1046                 }
1047         }
1048
1049         if (src->found_dir_item)
1050                 dst->found_dir_item = 1;
1051         if (src->found_file_extent)
1052                 dst->found_file_extent = 1;
1053         if (src->found_csum_item)
1054                 dst->found_csum_item = 1;
1055         if (src->some_csum_missing)
1056                 dst->some_csum_missing = 1;
1057         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1058                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1059                 if (ret < 0)
1060                         return ret;
1061         }
1062
1063         BUG_ON(src->found_link < dir_count);
1064         dst->found_link += src->found_link - dir_count;
1065         dst->found_size += src->found_size;
1066         if (src->extent_start != (u64)-1) {
1067                 if (dst->extent_start == (u64)-1) {
1068                         dst->extent_start = src->extent_start;
1069                         dst->extent_end = src->extent_end;
1070                 } else {
1071                         if (dst->extent_end > src->extent_start)
1072                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1073                         else if (dst->extent_end < src->extent_start) {
1074                                 ret = add_file_extent_hole(&dst->holes,
1075                                         dst->extent_end,
1076                                         src->extent_start - dst->extent_end);
1077                         }
1078                         if (dst->extent_end < src->extent_end)
1079                                 dst->extent_end = src->extent_end;
1080                 }
1081         }
1082
1083         dst->errors |= src->errors;
1084         if (src->found_inode_item) {
1085                 if (!dst->found_inode_item) {
1086                         dst->nlink = src->nlink;
1087                         dst->isize = src->isize;
1088                         dst->nbytes = src->nbytes;
1089                         dst->imode = src->imode;
1090                         dst->nodatasum = src->nodatasum;
1091                         dst->found_inode_item = 1;
1092                 } else {
1093                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1094                 }
1095         }
1096         dst->merging = 0;
1097
1098         return 0;
1099 }
1100
1101 static int splice_shared_node(struct shared_node *src_node,
1102                               struct shared_node *dst_node)
1103 {
1104         struct cache_extent *cache;
1105         struct ptr_node *node, *ins;
1106         struct cache_tree *src, *dst;
1107         struct inode_record *rec, *conflict;
1108         u64 current_ino = 0;
1109         int splice = 0;
1110         int ret;
1111
1112         if (--src_node->refs == 0)
1113                 splice = 1;
1114         if (src_node->current)
1115                 current_ino = src_node->current->ino;
1116
1117         src = &src_node->root_cache;
1118         dst = &dst_node->root_cache;
1119 again:
1120         cache = search_cache_extent(src, 0);
1121         while (cache) {
1122                 node = container_of(cache, struct ptr_node, cache);
1123                 rec = node->data;
1124                 cache = next_cache_extent(cache);
1125
1126                 if (splice) {
1127                         remove_cache_extent(src, &node->cache);
1128                         ins = node;
1129                 } else {
1130                         ins = malloc(sizeof(*ins));
1131                         BUG_ON(!ins);
1132                         ins->cache.start = node->cache.start;
1133                         ins->cache.size = node->cache.size;
1134                         ins->data = rec;
1135                         rec->refs++;
1136                 }
1137                 ret = insert_cache_extent(dst, &ins->cache);
1138                 if (ret == -EEXIST) {
1139                         conflict = get_inode_rec(dst, rec->ino, 1);
1140                         BUG_ON(IS_ERR(conflict));
1141                         merge_inode_recs(rec, conflict, dst);
1142                         if (rec->checked) {
1143                                 conflict->checked = 1;
1144                                 if (dst_node->current == conflict)
1145                                         dst_node->current = NULL;
1146                         }
1147                         maybe_free_inode_rec(dst, conflict);
1148                         free_inode_rec(rec);
1149                         free(ins);
1150                 } else {
1151                         BUG_ON(ret);
1152                 }
1153         }
1154
1155         if (src == &src_node->root_cache) {
1156                 src = &src_node->inode_cache;
1157                 dst = &dst_node->inode_cache;
1158                 goto again;
1159         }
1160
1161         if (current_ino > 0 && (!dst_node->current ||
1162             current_ino > dst_node->current->ino)) {
1163                 if (dst_node->current) {
1164                         dst_node->current->checked = 1;
1165                         maybe_free_inode_rec(dst, dst_node->current);
1166                 }
1167                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1168                 BUG_ON(IS_ERR(dst_node->current));
1169         }
1170         return 0;
1171 }
1172
1173 static void free_inode_ptr(struct cache_extent *cache)
1174 {
1175         struct ptr_node *node;
1176         struct inode_record *rec;
1177
1178         node = container_of(cache, struct ptr_node, cache);
1179         rec = node->data;
1180         free_inode_rec(rec);
1181         free(node);
1182 }
1183
1184 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1185
1186 static struct shared_node *find_shared_node(struct cache_tree *shared,
1187                                             u64 bytenr)
1188 {
1189         struct cache_extent *cache;
1190         struct shared_node *node;
1191
1192         cache = lookup_cache_extent(shared, bytenr, 1);
1193         if (cache) {
1194                 node = container_of(cache, struct shared_node, cache);
1195                 return node;
1196         }
1197         return NULL;
1198 }
1199
1200 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1201 {
1202         int ret;
1203         struct shared_node *node;
1204
1205         node = calloc(1, sizeof(*node));
1206         if (!node)
1207                 return -ENOMEM;
1208         node->cache.start = bytenr;
1209         node->cache.size = 1;
1210         cache_tree_init(&node->root_cache);
1211         cache_tree_init(&node->inode_cache);
1212         node->refs = refs;
1213
1214         ret = insert_cache_extent(shared, &node->cache);
1215
1216         return ret;
1217 }
1218
1219 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1220                              struct walk_control *wc, int level)
1221 {
1222         struct shared_node *node;
1223         struct shared_node *dest;
1224         int ret;
1225
1226         if (level == wc->active_node)
1227                 return 0;
1228
1229         BUG_ON(wc->active_node <= level);
1230         node = find_shared_node(&wc->shared, bytenr);
1231         if (!node) {
1232                 ret = add_shared_node(&wc->shared, bytenr, refs);
1233                 BUG_ON(ret);
1234                 node = find_shared_node(&wc->shared, bytenr);
1235                 wc->nodes[level] = node;
1236                 wc->active_node = level;
1237                 return 0;
1238         }
1239
1240         if (wc->root_level == wc->active_node &&
1241             btrfs_root_refs(&root->root_item) == 0) {
1242                 if (--node->refs == 0) {
1243                         free_inode_recs_tree(&node->root_cache);
1244                         free_inode_recs_tree(&node->inode_cache);
1245                         remove_cache_extent(&wc->shared, &node->cache);
1246                         free(node);
1247                 }
1248                 return 1;
1249         }
1250
1251         dest = wc->nodes[wc->active_node];
1252         splice_shared_node(node, dest);
1253         if (node->refs == 0) {
1254                 remove_cache_extent(&wc->shared, &node->cache);
1255                 free(node);
1256         }
1257         return 1;
1258 }
1259
1260 static int leave_shared_node(struct btrfs_root *root,
1261                              struct walk_control *wc, int level)
1262 {
1263         struct shared_node *node;
1264         struct shared_node *dest;
1265         int i;
1266
1267         if (level == wc->root_level)
1268                 return 0;
1269
1270         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1271                 if (wc->nodes[i])
1272                         break;
1273         }
1274         BUG_ON(i >= BTRFS_MAX_LEVEL);
1275
1276         node = wc->nodes[wc->active_node];
1277         wc->nodes[wc->active_node] = NULL;
1278         wc->active_node = i;
1279
1280         dest = wc->nodes[wc->active_node];
1281         if (wc->active_node < wc->root_level ||
1282             btrfs_root_refs(&root->root_item) > 0) {
1283                 BUG_ON(node->refs <= 1);
1284                 splice_shared_node(node, dest);
1285         } else {
1286                 BUG_ON(node->refs < 2);
1287                 node->refs--;
1288         }
1289         return 0;
1290 }
1291
1292 /*
1293  * Returns:
1294  * < 0 - on error
1295  * 1   - if the root with id child_root_id is a child of root parent_root_id
1296  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1297  *       has other root(s) as parent(s)
1298  * 2   - if the root child_root_id doesn't have any parent roots
1299  */
1300 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1301                          u64 child_root_id)
1302 {
1303         struct btrfs_path path;
1304         struct btrfs_key key;
1305         struct extent_buffer *leaf;
1306         int has_parent = 0;
1307         int ret;
1308
1309         btrfs_init_path(&path);
1310
1311         key.objectid = parent_root_id;
1312         key.type = BTRFS_ROOT_REF_KEY;
1313         key.offset = child_root_id;
1314         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1315                                 0, 0);
1316         if (ret < 0)
1317                 return ret;
1318         btrfs_release_path(&path);
1319         if (!ret)
1320                 return 1;
1321
1322         key.objectid = child_root_id;
1323         key.type = BTRFS_ROOT_BACKREF_KEY;
1324         key.offset = 0;
1325         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1326                                 0, 0);
1327         if (ret < 0)
1328                 goto out;
1329
1330         while (1) {
1331                 leaf = path.nodes[0];
1332                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1333                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1334                         if (ret)
1335                                 break;
1336                         leaf = path.nodes[0];
1337                 }
1338
1339                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1340                 if (key.objectid != child_root_id ||
1341                     key.type != BTRFS_ROOT_BACKREF_KEY)
1342                         break;
1343
1344                 has_parent = 1;
1345
1346                 if (key.offset == parent_root_id) {
1347                         btrfs_release_path(&path);
1348                         return 1;
1349                 }
1350
1351                 path.slots[0]++;
1352         }
1353 out:
1354         btrfs_release_path(&path);
1355         if (ret < 0)
1356                 return ret;
1357         return has_parent ? 0 : 2;
1358 }
1359
1360 static int process_dir_item(struct btrfs_root *root,
1361                             struct extent_buffer *eb,
1362                             int slot, struct btrfs_key *key,
1363                             struct shared_node *active_node)
1364 {
1365         u32 total;
1366         u32 cur = 0;
1367         u32 len;
1368         u32 name_len;
1369         u32 data_len;
1370         int error;
1371         int nritems = 0;
1372         int filetype;
1373         struct btrfs_dir_item *di;
1374         struct inode_record *rec;
1375         struct cache_tree *root_cache;
1376         struct cache_tree *inode_cache;
1377         struct btrfs_key location;
1378         char namebuf[BTRFS_NAME_LEN];
1379
1380         root_cache = &active_node->root_cache;
1381         inode_cache = &active_node->inode_cache;
1382         rec = active_node->current;
1383         rec->found_dir_item = 1;
1384
1385         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1386         total = btrfs_item_size_nr(eb, slot);
1387         while (cur < total) {
1388                 nritems++;
1389                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1390                 name_len = btrfs_dir_name_len(eb, di);
1391                 data_len = btrfs_dir_data_len(eb, di);
1392                 filetype = btrfs_dir_type(eb, di);
1393
1394                 rec->found_size += name_len;
1395                 if (name_len <= BTRFS_NAME_LEN) {
1396                         len = name_len;
1397                         error = 0;
1398                 } else {
1399                         len = BTRFS_NAME_LEN;
1400                         error = REF_ERR_NAME_TOO_LONG;
1401                 }
1402                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1403
1404                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1405                         add_inode_backref(inode_cache, location.objectid,
1406                                           key->objectid, key->offset, namebuf,
1407                                           len, filetype, key->type, error);
1408                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1409                         add_inode_backref(root_cache, location.objectid,
1410                                           key->objectid, key->offset,
1411                                           namebuf, len, filetype,
1412                                           key->type, error);
1413                 } else {
1414                         fprintf(stderr, "invalid location in dir item %u\n",
1415                                 location.type);
1416                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1417                                           key->objectid, key->offset, namebuf,
1418                                           len, filetype, key->type, error);
1419                 }
1420
1421                 len = sizeof(*di) + name_len + data_len;
1422                 di = (struct btrfs_dir_item *)((char *)di + len);
1423                 cur += len;
1424         }
1425         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1426                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1427
1428         return 0;
1429 }
1430
1431 static int process_inode_ref(struct extent_buffer *eb,
1432                              int slot, struct btrfs_key *key,
1433                              struct shared_node *active_node)
1434 {
1435         u32 total;
1436         u32 cur = 0;
1437         u32 len;
1438         u32 name_len;
1439         u64 index;
1440         int error;
1441         struct cache_tree *inode_cache;
1442         struct btrfs_inode_ref *ref;
1443         char namebuf[BTRFS_NAME_LEN];
1444
1445         inode_cache = &active_node->inode_cache;
1446
1447         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1448         total = btrfs_item_size_nr(eb, slot);
1449         while (cur < total) {
1450                 name_len = btrfs_inode_ref_name_len(eb, ref);
1451                 index = btrfs_inode_ref_index(eb, ref);
1452                 if (name_len <= BTRFS_NAME_LEN) {
1453                         len = name_len;
1454                         error = 0;
1455                 } else {
1456                         len = BTRFS_NAME_LEN;
1457                         error = REF_ERR_NAME_TOO_LONG;
1458                 }
1459                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1460                 add_inode_backref(inode_cache, key->objectid, key->offset,
1461                                   index, namebuf, len, 0, key->type, error);
1462
1463                 len = sizeof(*ref) + name_len;
1464                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1465                 cur += len;
1466         }
1467         return 0;
1468 }
1469
1470 static int process_inode_extref(struct extent_buffer *eb,
1471                                 int slot, struct btrfs_key *key,
1472                                 struct shared_node *active_node)
1473 {
1474         u32 total;
1475         u32 cur = 0;
1476         u32 len;
1477         u32 name_len;
1478         u64 index;
1479         u64 parent;
1480         int error;
1481         struct cache_tree *inode_cache;
1482         struct btrfs_inode_extref *extref;
1483         char namebuf[BTRFS_NAME_LEN];
1484
1485         inode_cache = &active_node->inode_cache;
1486
1487         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1488         total = btrfs_item_size_nr(eb, slot);
1489         while (cur < total) {
1490                 name_len = btrfs_inode_extref_name_len(eb, extref);
1491                 index = btrfs_inode_extref_index(eb, extref);
1492                 parent = btrfs_inode_extref_parent(eb, extref);
1493                 if (name_len <= BTRFS_NAME_LEN) {
1494                         len = name_len;
1495                         error = 0;
1496                 } else {
1497                         len = BTRFS_NAME_LEN;
1498                         error = REF_ERR_NAME_TOO_LONG;
1499                 }
1500                 read_extent_buffer(eb, namebuf,
1501                                    (unsigned long)(extref + 1), len);
1502                 add_inode_backref(inode_cache, key->objectid, parent,
1503                                   index, namebuf, len, 0, key->type, error);
1504
1505                 len = sizeof(*extref) + name_len;
1506                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1507                 cur += len;
1508         }
1509         return 0;
1510
1511 }
1512
1513 static int count_csum_range(struct btrfs_root *root, u64 start,
1514                             u64 len, u64 *found)
1515 {
1516         struct btrfs_key key;
1517         struct btrfs_path path;
1518         struct extent_buffer *leaf;
1519         int ret;
1520         size_t size;
1521         *found = 0;
1522         u64 csum_end;
1523         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1524
1525         btrfs_init_path(&path);
1526
1527         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1528         key.offset = start;
1529         key.type = BTRFS_EXTENT_CSUM_KEY;
1530
1531         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1532                                 &key, &path, 0, 0);
1533         if (ret < 0)
1534                 goto out;
1535         if (ret > 0 && path.slots[0] > 0) {
1536                 leaf = path.nodes[0];
1537                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1538                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1539                     key.type == BTRFS_EXTENT_CSUM_KEY)
1540                         path.slots[0]--;
1541         }
1542
1543         while (len > 0) {
1544                 leaf = path.nodes[0];
1545                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1546                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1547                         if (ret > 0)
1548                                 break;
1549                         else if (ret < 0)
1550                                 goto out;
1551                         leaf = path.nodes[0];
1552                 }
1553
1554                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1555                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1556                     key.type != BTRFS_EXTENT_CSUM_KEY)
1557                         break;
1558
1559                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1560                 if (key.offset >= start + len)
1561                         break;
1562
1563                 if (key.offset > start)
1564                         start = key.offset;
1565
1566                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1567                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1568                 if (csum_end > start) {
1569                         size = min(csum_end - start, len);
1570                         len -= size;
1571                         start += size;
1572                         *found += size;
1573                 }
1574
1575                 path.slots[0]++;
1576         }
1577 out:
1578         btrfs_release_path(&path);
1579         if (ret < 0)
1580                 return ret;
1581         return 0;
1582 }
1583
1584 static int process_file_extent(struct btrfs_root *root,
1585                                 struct extent_buffer *eb,
1586                                 int slot, struct btrfs_key *key,
1587                                 struct shared_node *active_node)
1588 {
1589         struct inode_record *rec;
1590         struct btrfs_file_extent_item *fi;
1591         u64 num_bytes = 0;
1592         u64 disk_bytenr = 0;
1593         u64 extent_offset = 0;
1594         u64 mask = root->sectorsize - 1;
1595         int extent_type;
1596         int ret;
1597
1598         rec = active_node->current;
1599         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1600         rec->found_file_extent = 1;
1601
1602         if (rec->extent_start == (u64)-1) {
1603                 rec->extent_start = key->offset;
1604                 rec->extent_end = key->offset;
1605         }
1606
1607         if (rec->extent_end > key->offset)
1608                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1609         else if (rec->extent_end < key->offset) {
1610                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1611                                            key->offset - rec->extent_end);
1612                 if (ret < 0)
1613                         return ret;
1614         }
1615
1616         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1617         extent_type = btrfs_file_extent_type(eb, fi);
1618
1619         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1620                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1621                 if (num_bytes == 0)
1622                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1623                 rec->found_size += num_bytes;
1624                 num_bytes = (num_bytes + mask) & ~mask;
1625         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1626                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1627                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1628                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1629                 extent_offset = btrfs_file_extent_offset(eb, fi);
1630                 if (num_bytes == 0 || (num_bytes & mask))
1631                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1632                 if (num_bytes + extent_offset >
1633                     btrfs_file_extent_ram_bytes(eb, fi))
1634                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1635                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1636                     (btrfs_file_extent_compression(eb, fi) ||
1637                      btrfs_file_extent_encryption(eb, fi) ||
1638                      btrfs_file_extent_other_encoding(eb, fi)))
1639                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1640                 if (disk_bytenr > 0)
1641                         rec->found_size += num_bytes;
1642         } else {
1643                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1644         }
1645         rec->extent_end = key->offset + num_bytes;
1646
1647         /*
1648          * The data reloc tree will copy full extents into its inode and then
1649          * copy the corresponding csums.  Because the extent it copied could be
1650          * a preallocated extent that hasn't been written to yet there may be no
1651          * csums to copy, ergo we won't have csums for our file extent.  This is
1652          * ok so just don't bother checking csums if the inode belongs to the
1653          * data reloc tree.
1654          */
1655         if (disk_bytenr > 0 &&
1656             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1657                 u64 found;
1658                 if (btrfs_file_extent_compression(eb, fi))
1659                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1660                 else
1661                         disk_bytenr += extent_offset;
1662
1663                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1664                 if (ret < 0)
1665                         return ret;
1666                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1667                         if (found > 0)
1668                                 rec->found_csum_item = 1;
1669                         if (found < num_bytes)
1670                                 rec->some_csum_missing = 1;
1671                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1672                         if (found > 0)
1673                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1674                 }
1675         }
1676         return 0;
1677 }
1678
1679 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1680                             struct walk_control *wc)
1681 {
1682         struct btrfs_key key;
1683         u32 nritems;
1684         int i;
1685         int ret = 0;
1686         struct cache_tree *inode_cache;
1687         struct shared_node *active_node;
1688
1689         if (wc->root_level == wc->active_node &&
1690             btrfs_root_refs(&root->root_item) == 0)
1691                 return 0;
1692
1693         active_node = wc->nodes[wc->active_node];
1694         inode_cache = &active_node->inode_cache;
1695         nritems = btrfs_header_nritems(eb);
1696         for (i = 0; i < nritems; i++) {
1697                 btrfs_item_key_to_cpu(eb, &key, i);
1698
1699                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1700                         continue;
1701                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1702                         continue;
1703
1704                 if (active_node->current == NULL ||
1705                     active_node->current->ino < key.objectid) {
1706                         if (active_node->current) {
1707                                 active_node->current->checked = 1;
1708                                 maybe_free_inode_rec(inode_cache,
1709                                                      active_node->current);
1710                         }
1711                         active_node->current = get_inode_rec(inode_cache,
1712                                                              key.objectid, 1);
1713                         BUG_ON(IS_ERR(active_node->current));
1714                 }
1715                 switch (key.type) {
1716                 case BTRFS_DIR_ITEM_KEY:
1717                 case BTRFS_DIR_INDEX_KEY:
1718                         ret = process_dir_item(root, eb, i, &key, active_node);
1719                         break;
1720                 case BTRFS_INODE_REF_KEY:
1721                         ret = process_inode_ref(eb, i, &key, active_node);
1722                         break;
1723                 case BTRFS_INODE_EXTREF_KEY:
1724                         ret = process_inode_extref(eb, i, &key, active_node);
1725                         break;
1726                 case BTRFS_INODE_ITEM_KEY:
1727                         ret = process_inode_item(eb, i, &key, active_node);
1728                         break;
1729                 case BTRFS_EXTENT_DATA_KEY:
1730                         ret = process_file_extent(root, eb, i, &key,
1731                                                   active_node);
1732                         break;
1733                 default:
1734                         break;
1735                 };
1736         }
1737         return ret;
1738 }
1739
1740 static void reada_walk_down(struct btrfs_root *root,
1741                             struct extent_buffer *node, int slot)
1742 {
1743         u64 bytenr;
1744         u64 ptr_gen;
1745         u32 nritems;
1746         u32 blocksize;
1747         int i;
1748         int level;
1749
1750         level = btrfs_header_level(node);
1751         if (level != 1)
1752                 return;
1753
1754         nritems = btrfs_header_nritems(node);
1755         blocksize = btrfs_level_size(root, level - 1);
1756         for (i = slot; i < nritems; i++) {
1757                 bytenr = btrfs_node_blockptr(node, i);
1758                 ptr_gen = btrfs_node_ptr_generation(node, i);
1759                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1760         }
1761 }
1762
1763 /*
1764  * Check the child node/leaf by the following condition:
1765  * 1. the first item key of the node/leaf should be the same with the one
1766  *    in parent.
1767  * 2. block in parent node should match the child node/leaf.
1768  * 3. generation of parent node and child's header should be consistent.
1769  *
1770  * Or the child node/leaf pointed by the key in parent is not valid.
1771  *
1772  * We hope to check leaf owner too, but since subvol may share leaves,
1773  * which makes leaf owner check not so strong, key check should be
1774  * sufficient enough for that case.
1775  */
1776 static int check_child_node(struct btrfs_root *root,
1777                             struct extent_buffer *parent, int slot,
1778                             struct extent_buffer *child)
1779 {
1780         struct btrfs_key parent_key;
1781         struct btrfs_key child_key;
1782         int ret = 0;
1783
1784         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1785         if (btrfs_header_level(child) == 0)
1786                 btrfs_item_key_to_cpu(child, &child_key, 0);
1787         else
1788                 btrfs_node_key_to_cpu(child, &child_key, 0);
1789
1790         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1791                 ret = -EINVAL;
1792                 fprintf(stderr,
1793                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1794                         parent_key.objectid, parent_key.type, parent_key.offset,
1795                         child_key.objectid, child_key.type, child_key.offset);
1796         }
1797         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1798                 ret = -EINVAL;
1799                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1800                         btrfs_node_blockptr(parent, slot),
1801                         btrfs_header_bytenr(child));
1802         }
1803         if (btrfs_node_ptr_generation(parent, slot) !=
1804             btrfs_header_generation(child)) {
1805                 ret = -EINVAL;
1806                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1807                         btrfs_header_generation(child),
1808                         btrfs_node_ptr_generation(parent, slot));
1809         }
1810         return ret;
1811 }
1812
1813 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1814                           struct walk_control *wc, int *level)
1815 {
1816         enum btrfs_tree_block_status status;
1817         u64 bytenr;
1818         u64 ptr_gen;
1819         struct extent_buffer *next;
1820         struct extent_buffer *cur;
1821         u32 blocksize;
1822         int ret, err = 0;
1823         u64 refs;
1824
1825         WARN_ON(*level < 0);
1826         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1827         ret = btrfs_lookup_extent_info(NULL, root,
1828                                        path->nodes[*level]->start,
1829                                        *level, 1, &refs, NULL);
1830         if (ret < 0) {
1831                 err = ret;
1832                 goto out;
1833         }
1834
1835         if (refs > 1) {
1836                 ret = enter_shared_node(root, path->nodes[*level]->start,
1837                                         refs, wc, *level);
1838                 if (ret > 0) {
1839                         err = ret;
1840                         goto out;
1841                 }
1842         }
1843
1844         while (*level >= 0) {
1845                 WARN_ON(*level < 0);
1846                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1847                 cur = path->nodes[*level];
1848
1849                 if (btrfs_header_level(cur) != *level)
1850                         WARN_ON(1);
1851
1852                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1853                         break;
1854                 if (*level == 0) {
1855                         ret = process_one_leaf(root, cur, wc);
1856                         if (ret < 0)
1857                                 err = ret;
1858                         break;
1859                 }
1860                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1861                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1862                 blocksize = btrfs_level_size(root, *level - 1);
1863                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1864                                                1, &refs, NULL);
1865                 if (ret < 0)
1866                         refs = 0;
1867
1868                 if (refs > 1) {
1869                         ret = enter_shared_node(root, bytenr, refs,
1870                                                 wc, *level - 1);
1871                         if (ret > 0) {
1872                                 path->slots[*level]++;
1873                                 continue;
1874                         }
1875                 }
1876
1877                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1878                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1879                         free_extent_buffer(next);
1880                         reada_walk_down(root, cur, path->slots[*level]);
1881                         next = read_tree_block(root, bytenr, blocksize,
1882                                                ptr_gen);
1883                         if (!extent_buffer_uptodate(next)) {
1884                                 struct btrfs_key node_key;
1885
1886                                 btrfs_node_key_to_cpu(path->nodes[*level],
1887                                                       &node_key,
1888                                                       path->slots[*level]);
1889                                 btrfs_add_corrupt_extent_record(root->fs_info,
1890                                                 &node_key,
1891                                                 path->nodes[*level]->start,
1892                                                 root->leafsize, *level);
1893                                 err = -EIO;
1894                                 goto out;
1895                         }
1896                 }
1897
1898                 ret = check_child_node(root, cur, path->slots[*level], next);
1899                 if (ret) {
1900                         err = ret;
1901                         goto out;
1902                 }
1903
1904                 if (btrfs_is_leaf(next))
1905                         status = btrfs_check_leaf(root, NULL, next);
1906                 else
1907                         status = btrfs_check_node(root, NULL, next);
1908                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1909                         free_extent_buffer(next);
1910                         err = -EIO;
1911                         goto out;
1912                 }
1913
1914                 *level = *level - 1;
1915                 free_extent_buffer(path->nodes[*level]);
1916                 path->nodes[*level] = next;
1917                 path->slots[*level] = 0;
1918         }
1919 out:
1920         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1921         return err;
1922 }
1923
1924 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1925                         struct walk_control *wc, int *level)
1926 {
1927         int i;
1928         struct extent_buffer *leaf;
1929
1930         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1931                 leaf = path->nodes[i];
1932                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1933                         path->slots[i]++;
1934                         *level = i;
1935                         return 0;
1936                 } else {
1937                         free_extent_buffer(path->nodes[*level]);
1938                         path->nodes[*level] = NULL;
1939                         BUG_ON(*level > wc->active_node);
1940                         if (*level == wc->active_node)
1941                                 leave_shared_node(root, wc, *level);
1942                         *level = i + 1;
1943                 }
1944         }
1945         return 1;
1946 }
1947
1948 static int check_root_dir(struct inode_record *rec)
1949 {
1950         struct inode_backref *backref;
1951         int ret = -1;
1952
1953         if (!rec->found_inode_item || rec->errors)
1954                 goto out;
1955         if (rec->nlink != 1 || rec->found_link != 0)
1956                 goto out;
1957         if (list_empty(&rec->backrefs))
1958                 goto out;
1959         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1960         if (!backref->found_inode_ref)
1961                 goto out;
1962         if (backref->index != 0 || backref->namelen != 2 ||
1963             memcmp(backref->name, "..", 2))
1964                 goto out;
1965         if (backref->found_dir_index || backref->found_dir_item)
1966                 goto out;
1967         ret = 0;
1968 out:
1969         return ret;
1970 }
1971
1972 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1973                               struct btrfs_root *root, struct btrfs_path *path,
1974                               struct inode_record *rec)
1975 {
1976         struct btrfs_inode_item *ei;
1977         struct btrfs_key key;
1978         int ret;
1979
1980         key.objectid = rec->ino;
1981         key.type = BTRFS_INODE_ITEM_KEY;
1982         key.offset = (u64)-1;
1983
1984         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1985         if (ret < 0)
1986                 goto out;
1987         if (ret) {
1988                 if (!path->slots[0]) {
1989                         ret = -ENOENT;
1990                         goto out;
1991                 }
1992                 path->slots[0]--;
1993                 ret = 0;
1994         }
1995         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1996         if (key.objectid != rec->ino) {
1997                 ret = -ENOENT;
1998                 goto out;
1999         }
2000
2001         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2002                             struct btrfs_inode_item);
2003         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2004         btrfs_mark_buffer_dirty(path->nodes[0]);
2005         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2006         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2007                root->root_key.objectid);
2008 out:
2009         btrfs_release_path(path);
2010         return ret;
2011 }
2012
2013 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2014                                     struct btrfs_root *root,
2015                                     struct btrfs_path *path,
2016                                     struct inode_record *rec)
2017 {
2018         int ret;
2019
2020         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2021         btrfs_release_path(path);
2022         if (!ret)
2023                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2024         return ret;
2025 }
2026
2027 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2028                                struct btrfs_root *root,
2029                                struct btrfs_path *path,
2030                                struct inode_record *rec)
2031 {
2032         struct btrfs_inode_item *ei;
2033         struct btrfs_key key;
2034         int ret = 0;
2035
2036         key.objectid = rec->ino;
2037         key.type = BTRFS_INODE_ITEM_KEY;
2038         key.offset = 0;
2039
2040         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2041         if (ret) {
2042                 if (ret > 0)
2043                         ret = -ENOENT;
2044                 goto out;
2045         }
2046
2047         /* Since ret == 0, no need to check anything */
2048         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2049                             struct btrfs_inode_item);
2050         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2051         btrfs_mark_buffer_dirty(path->nodes[0]);
2052         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2053         printf("reset nbytes for ino %llu root %llu\n",
2054                rec->ino, root->root_key.objectid);
2055 out:
2056         btrfs_release_path(path);
2057         return ret;
2058 }
2059
2060 static int add_missing_dir_index(struct btrfs_root *root,
2061                                  struct cache_tree *inode_cache,
2062                                  struct inode_record *rec,
2063                                  struct inode_backref *backref)
2064 {
2065         struct btrfs_path *path;
2066         struct btrfs_trans_handle *trans;
2067         struct btrfs_dir_item *dir_item;
2068         struct extent_buffer *leaf;
2069         struct btrfs_key key;
2070         struct btrfs_disk_key disk_key;
2071         struct inode_record *dir_rec;
2072         unsigned long name_ptr;
2073         u32 data_size = sizeof(*dir_item) + backref->namelen;
2074         int ret;
2075
2076         path = btrfs_alloc_path();
2077         if (!path)
2078                 return -ENOMEM;
2079
2080         trans = btrfs_start_transaction(root, 1);
2081         if (IS_ERR(trans)) {
2082                 btrfs_free_path(path);
2083                 return PTR_ERR(trans);
2084         }
2085
2086         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2087                 (unsigned long long)rec->ino);
2088         key.objectid = backref->dir;
2089         key.type = BTRFS_DIR_INDEX_KEY;
2090         key.offset = backref->index;
2091
2092         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2093         BUG_ON(ret);
2094
2095         leaf = path->nodes[0];
2096         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2097
2098         disk_key.objectid = cpu_to_le64(rec->ino);
2099         disk_key.type = BTRFS_INODE_ITEM_KEY;
2100         disk_key.offset = 0;
2101
2102         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2103         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2104         btrfs_set_dir_data_len(leaf, dir_item, 0);
2105         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2106         name_ptr = (unsigned long)(dir_item + 1);
2107         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2108         btrfs_mark_buffer_dirty(leaf);
2109         btrfs_free_path(path);
2110         btrfs_commit_transaction(trans, root);
2111
2112         backref->found_dir_index = 1;
2113         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2114         BUG_ON(IS_ERR(dir_rec));
2115         if (!dir_rec)
2116                 return 0;
2117         dir_rec->found_size += backref->namelen;
2118         if (dir_rec->found_size == dir_rec->isize &&
2119             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2120                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2121         if (dir_rec->found_size != dir_rec->isize)
2122                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2123
2124         return 0;
2125 }
2126
2127 static int delete_dir_index(struct btrfs_root *root,
2128                             struct cache_tree *inode_cache,
2129                             struct inode_record *rec,
2130                             struct inode_backref *backref)
2131 {
2132         struct btrfs_trans_handle *trans;
2133         struct btrfs_dir_item *di;
2134         struct btrfs_path *path;
2135         int ret = 0;
2136
2137         path = btrfs_alloc_path();
2138         if (!path)
2139                 return -ENOMEM;
2140
2141         trans = btrfs_start_transaction(root, 1);
2142         if (IS_ERR(trans)) {
2143                 btrfs_free_path(path);
2144                 return PTR_ERR(trans);
2145         }
2146
2147
2148         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2149                 (unsigned long long)backref->dir,
2150                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2151                 (unsigned long long)root->objectid);
2152
2153         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2154                                     backref->name, backref->namelen,
2155                                     backref->index, -1);
2156         if (IS_ERR(di)) {
2157                 ret = PTR_ERR(di);
2158                 btrfs_free_path(path);
2159                 btrfs_commit_transaction(trans, root);
2160                 if (ret == -ENOENT)
2161                         return 0;
2162                 return ret;
2163         }
2164
2165         if (!di)
2166                 ret = btrfs_del_item(trans, root, path);
2167         else
2168                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2169         BUG_ON(ret);
2170         btrfs_free_path(path);
2171         btrfs_commit_transaction(trans, root);
2172         return ret;
2173 }
2174
2175 static int create_inode_item(struct btrfs_root *root,
2176                              struct inode_record *rec,
2177                              struct inode_backref *backref, int root_dir)
2178 {
2179         struct btrfs_trans_handle *trans;
2180         struct btrfs_inode_item inode_item;
2181         time_t now = time(NULL);
2182         int ret;
2183
2184         trans = btrfs_start_transaction(root, 1);
2185         if (IS_ERR(trans)) {
2186                 ret = PTR_ERR(trans);
2187                 return ret;
2188         }
2189
2190         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2191                 "be incomplete, please check permissions and content after "
2192                 "the fsck completes.\n", (unsigned long long)root->objectid,
2193                 (unsigned long long)rec->ino);
2194
2195         memset(&inode_item, 0, sizeof(inode_item));
2196         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2197         if (root_dir)
2198                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2199         else
2200                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2201         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2202         if (rec->found_dir_item) {
2203                 if (rec->found_file_extent)
2204                         fprintf(stderr, "root %llu inode %llu has both a dir "
2205                                 "item and extents, unsure if it is a dir or a "
2206                                 "regular file so setting it as a directory\n",
2207                                 (unsigned long long)root->objectid,
2208                                 (unsigned long long)rec->ino);
2209                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2210                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2211         } else if (!rec->found_dir_item) {
2212                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2213                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2214         }
2215         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2216         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2217         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2218         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2219         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2220         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2221         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2222         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2223
2224         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2225         BUG_ON(ret);
2226         btrfs_commit_transaction(trans, root);
2227         return 0;
2228 }
2229
2230 static int repair_inode_backrefs(struct btrfs_root *root,
2231                                  struct inode_record *rec,
2232                                  struct cache_tree *inode_cache,
2233                                  int delete)
2234 {
2235         struct inode_backref *tmp, *backref;
2236         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2237         int ret = 0;
2238         int repaired = 0;
2239
2240         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2241                 if (!delete && rec->ino == root_dirid) {
2242                         if (!rec->found_inode_item) {
2243                                 ret = create_inode_item(root, rec, backref, 1);
2244                                 if (ret)
2245                                         break;
2246                                 repaired++;
2247                         }
2248                 }
2249
2250                 /* Index 0 for root dir's are special, don't mess with it */
2251                 if (rec->ino == root_dirid && backref->index == 0)
2252                         continue;
2253
2254                 if (delete &&
2255                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2256                      (backref->found_dir_index && backref->found_inode_ref &&
2257                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2258                         ret = delete_dir_index(root, inode_cache, rec, backref);
2259                         if (ret)
2260                                 break;
2261                         repaired++;
2262                         list_del(&backref->list);
2263                         free(backref);
2264                 }
2265
2266                 if (!delete && !backref->found_dir_index &&
2267                     backref->found_dir_item && backref->found_inode_ref) {
2268                         ret = add_missing_dir_index(root, inode_cache, rec,
2269                                                     backref);
2270                         if (ret)
2271                                 break;
2272                         repaired++;
2273                         if (backref->found_dir_item &&
2274                             backref->found_dir_index &&
2275                             backref->found_dir_index) {
2276                                 if (!backref->errors &&
2277                                     backref->found_inode_ref) {
2278                                         list_del(&backref->list);
2279                                         free(backref);
2280                                 }
2281                         }
2282                 }
2283
2284                 if (!delete && (!backref->found_dir_index &&
2285                                 !backref->found_dir_item &&
2286                                 backref->found_inode_ref)) {
2287                         struct btrfs_trans_handle *trans;
2288                         struct btrfs_key location;
2289
2290                         ret = check_dir_conflict(root, backref->name,
2291                                                  backref->namelen,
2292                                                  backref->dir,
2293                                                  backref->index);
2294                         if (ret) {
2295                                 /*
2296                                  * let nlink fixing routine to handle it,
2297                                  * which can do it better.
2298                                  */
2299                                 ret = 0;
2300                                 break;
2301                         }
2302                         location.objectid = rec->ino;
2303                         location.type = BTRFS_INODE_ITEM_KEY;
2304                         location.offset = 0;
2305
2306                         trans = btrfs_start_transaction(root, 1);
2307                         if (IS_ERR(trans)) {
2308                                 ret = PTR_ERR(trans);
2309                                 break;
2310                         }
2311                         fprintf(stderr, "adding missing dir index/item pair "
2312                                 "for inode %llu\n",
2313                                 (unsigned long long)rec->ino);
2314                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2315                                                     backref->namelen,
2316                                                     backref->dir, &location,
2317                                                     imode_to_type(rec->imode),
2318                                                     backref->index);
2319                         BUG_ON(ret);
2320                         btrfs_commit_transaction(trans, root);
2321                         repaired++;
2322                 }
2323
2324                 if (!delete && (backref->found_inode_ref &&
2325                                 backref->found_dir_index &&
2326                                 backref->found_dir_item &&
2327                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2328                                 !rec->found_inode_item)) {
2329                         ret = create_inode_item(root, rec, backref, 0);
2330                         if (ret)
2331                                 break;
2332                         repaired++;
2333                 }
2334
2335         }
2336         return ret ? ret : repaired;
2337 }
2338
2339 /*
2340  * To determine the file type for nlink/inode_item repair
2341  *
2342  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2343  * Return -ENOENT if file type is not found.
2344  */
2345 static int find_file_type(struct inode_record *rec, u8 *type)
2346 {
2347         struct inode_backref *backref;
2348
2349         /* For inode item recovered case */
2350         if (rec->found_inode_item) {
2351                 *type = imode_to_type(rec->imode);
2352                 return 0;
2353         }
2354
2355         list_for_each_entry(backref, &rec->backrefs, list) {
2356                 if (backref->found_dir_index || backref->found_dir_item) {
2357                         *type = backref->filetype;
2358                         return 0;
2359                 }
2360         }
2361         return -ENOENT;
2362 }
2363
2364 /*
2365  * To determine the file name for nlink repair
2366  *
2367  * Return 0 if file name is found, set name and namelen.
2368  * Return -ENOENT if file name is not found.
2369  */
2370 static int find_file_name(struct inode_record *rec,
2371                           char *name, int *namelen)
2372 {
2373         struct inode_backref *backref;
2374
2375         list_for_each_entry(backref, &rec->backrefs, list) {
2376                 if (backref->found_dir_index || backref->found_dir_item ||
2377                     backref->found_inode_ref) {
2378                         memcpy(name, backref->name, backref->namelen);
2379                         *namelen = backref->namelen;
2380                         return 0;
2381                 }
2382         }
2383         return -ENOENT;
2384 }
2385
2386 /* Reset the nlink of the inode to the correct one */
2387 static int reset_nlink(struct btrfs_trans_handle *trans,
2388                        struct btrfs_root *root,
2389                        struct btrfs_path *path,
2390                        struct inode_record *rec)
2391 {
2392         struct inode_backref *backref;
2393         struct inode_backref *tmp;
2394         struct btrfs_key key;
2395         struct btrfs_inode_item *inode_item;
2396         int ret = 0;
2397
2398         /* We don't believe this either, reset it and iterate backref */
2399         rec->found_link = 0;
2400
2401         /* Remove all backref including the valid ones */
2402         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2403                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2404                                    backref->index, backref->name,
2405                                    backref->namelen, 0);
2406                 if (ret < 0)
2407                         goto out;
2408
2409                 /* remove invalid backref, so it won't be added back */
2410                 if (!(backref->found_dir_index &&
2411                       backref->found_dir_item &&
2412                       backref->found_inode_ref)) {
2413                         list_del(&backref->list);
2414                         free(backref);
2415                 } else {
2416                         rec->found_link++;
2417                 }
2418         }
2419
2420         /* Set nlink to 0 */
2421         key.objectid = rec->ino;
2422         key.type = BTRFS_INODE_ITEM_KEY;
2423         key.offset = 0;
2424         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2425         if (ret < 0)
2426                 goto out;
2427         if (ret > 0) {
2428                 ret = -ENOENT;
2429                 goto out;
2430         }
2431         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2432                                     struct btrfs_inode_item);
2433         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2434         btrfs_mark_buffer_dirty(path->nodes[0]);
2435         btrfs_release_path(path);
2436
2437         /*
2438          * Add back valid inode_ref/dir_item/dir_index,
2439          * add_link() will handle the nlink inc, so new nlink must be correct
2440          */
2441         list_for_each_entry(backref, &rec->backrefs, list) {
2442                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2443                                      backref->name, backref->namelen,
2444                                      backref->filetype, &backref->index, 1);
2445                 if (ret < 0)
2446                         goto out;
2447         }
2448 out:
2449         btrfs_release_path(path);
2450         return ret;
2451 }
2452
2453 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2454                                struct btrfs_root *root,
2455                                struct btrfs_path *path,
2456                                struct inode_record *rec)
2457 {
2458         char *dir_name = "lost+found";
2459         char namebuf[BTRFS_NAME_LEN] = {0};
2460         u64 lost_found_ino;
2461         u32 mode = 0700;
2462         u8 type = 0;
2463         int namelen = 0;
2464         int name_recovered = 0;
2465         int type_recovered = 0;
2466         int ret = 0;
2467
2468         /*
2469          * Get file name and type first before these invalid inode ref
2470          * are deleted by remove_all_invalid_backref()
2471          */
2472         name_recovered = !find_file_name(rec, namebuf, &namelen);
2473         type_recovered = !find_file_type(rec, &type);
2474
2475         if (!name_recovered) {
2476                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2477                        rec->ino, rec->ino);
2478                 namelen = count_digits(rec->ino);
2479                 sprintf(namebuf, "%llu", rec->ino);
2480                 name_recovered = 1;
2481         }
2482         if (!type_recovered) {
2483                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2484                        rec->ino);
2485                 type = BTRFS_FT_REG_FILE;
2486                 type_recovered = 1;
2487         }
2488
2489         ret = reset_nlink(trans, root, path, rec);
2490         if (ret < 0) {
2491                 fprintf(stderr,
2492                         "Failed to reset nlink for inode %llu: %s\n",
2493                         rec->ino, strerror(-ret));
2494                 goto out;
2495         }
2496
2497         if (rec->found_link == 0) {
2498                 lost_found_ino = root->highest_inode;
2499                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2500                         ret = -EOVERFLOW;
2501                         goto out;
2502                 }
2503                 lost_found_ino++;
2504                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2505                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2506                                   mode);
2507                 if (ret < 0) {
2508                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2509                                 dir_name, strerror(-ret));
2510                         goto out;
2511                 }
2512                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2513                                      namebuf, namelen, type, NULL, 1);
2514                 /*
2515                  * Add ".INO" suffix several times to handle case where
2516                  * "FILENAME.INO" is already taken by another file.
2517                  */
2518                 while (ret == -EEXIST) {
2519                         /*
2520                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2521                          */
2522                         if (namelen + count_digits(rec->ino) + 1 >
2523                             BTRFS_NAME_LEN) {
2524                                 ret = -EFBIG;
2525                                 goto out;
2526                         }
2527                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2528                                  ".%llu", rec->ino);
2529                         namelen += count_digits(rec->ino) + 1;
2530                         ret = btrfs_add_link(trans, root, rec->ino,
2531                                              lost_found_ino, namebuf,
2532                                              namelen, type, NULL, 1);
2533                 }
2534                 if (ret < 0) {
2535                         fprintf(stderr,
2536                                 "Failed to link the inode %llu to %s dir: %s\n",
2537                                 rec->ino, dir_name, strerror(-ret));
2538                         goto out;
2539                 }
2540                 /*
2541                  * Just increase the found_link, don't actually add the
2542                  * backref. This will make things easier and this inode
2543                  * record will be freed after the repair is done.
2544                  * So fsck will not report problem about this inode.
2545                  */
2546                 rec->found_link++;
2547                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2548                        namelen, namebuf, dir_name);
2549         }
2550         printf("Fixed the nlink of inode %llu\n", rec->ino);
2551 out:
2552         /*
2553          * Clear the flag anyway, or we will loop forever for the same inode
2554          * as it will not be removed from the bad inode list and the dead loop
2555          * happens.
2556          */
2557         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2558         btrfs_release_path(path);
2559         return ret;
2560 }
2561
2562 /*
2563  * Check if there is any normal(reg or prealloc) file extent for given
2564  * ino.
2565  * This is used to determine the file type when neither its dir_index/item or
2566  * inode_item exists.
2567  *
2568  * This will *NOT* report error, if any error happens, just consider it does
2569  * not have any normal file extent.
2570  */
2571 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2572 {
2573         struct btrfs_path *path;
2574         struct btrfs_key key;
2575         struct btrfs_key found_key;
2576         struct btrfs_file_extent_item *fi;
2577         u8 type;
2578         int ret = 0;
2579
2580         path = btrfs_alloc_path();
2581         if (!path)
2582                 goto out;
2583         key.objectid = ino;
2584         key.type = BTRFS_EXTENT_DATA_KEY;
2585         key.offset = 0;
2586
2587         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2588         if (ret < 0) {
2589                 ret = 0;
2590                 goto out;
2591         }
2592         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2593                 ret = btrfs_next_leaf(root, path);
2594                 if (ret) {
2595                         ret = 0;
2596                         goto out;
2597                 }
2598         }
2599         while (1) {
2600                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2601                                       path->slots[0]);
2602                 if (found_key.objectid != ino ||
2603                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2604                         break;
2605                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2606                                     struct btrfs_file_extent_item);
2607                 type = btrfs_file_extent_type(path->nodes[0], fi);
2608                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2609                         ret = 1;
2610                         goto out;
2611                 }
2612         }
2613 out:
2614         btrfs_free_path(path);
2615         return ret;
2616 }
2617
2618 static u32 btrfs_type_to_imode(u8 type)
2619 {
2620         static u32 imode_by_btrfs_type[] = {
2621                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2622                 [BTRFS_FT_DIR]          = S_IFDIR,
2623                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2624                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2625                 [BTRFS_FT_FIFO]         = S_IFIFO,
2626                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2627                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2628         };
2629
2630         return imode_by_btrfs_type[(type)];
2631 }
2632
2633 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2634                                 struct btrfs_root *root,
2635                                 struct btrfs_path *path,
2636                                 struct inode_record *rec)
2637 {
2638         u8 filetype;
2639         u32 mode = 0700;
2640         int type_recovered = 0;
2641         int ret = 0;
2642
2643         printf("Trying to rebuild inode:%llu\n", rec->ino);
2644
2645         type_recovered = !find_file_type(rec, &filetype);
2646
2647         /*
2648          * Try to determine inode type if type not found.
2649          *
2650          * For found regular file extent, it must be FILE.
2651          * For found dir_item/index, it must be DIR.
2652          *
2653          * For undetermined one, use FILE as fallback.
2654          *
2655          * TODO:
2656          * 1. If found backref(inode_index/item is already handled) to it,
2657          *    it must be DIR.
2658          *    Need new inode-inode ref structure to allow search for that.
2659          */
2660         if (!type_recovered) {
2661                 if (rec->found_file_extent &&
2662                     find_normal_file_extent(root, rec->ino)) {
2663                         type_recovered = 1;
2664                         filetype = BTRFS_FT_REG_FILE;
2665                 } else if (rec->found_dir_item) {
2666                         type_recovered = 1;
2667                         filetype = BTRFS_FT_DIR;
2668                 } else if (!list_empty(&rec->orphan_extents)) {
2669                         type_recovered = 1;
2670                         filetype = BTRFS_FT_REG_FILE;
2671                 } else{
2672                         printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2673                                rec->ino);
2674                         type_recovered = 1;
2675                         filetype = BTRFS_FT_REG_FILE;
2676                 }
2677         }
2678
2679         ret = btrfs_new_inode(trans, root, rec->ino,
2680                               mode | btrfs_type_to_imode(filetype));
2681         if (ret < 0)
2682                 goto out;
2683
2684         /*
2685          * Here inode rebuild is done, we only rebuild the inode item,
2686          * don't repair the nlink(like move to lost+found).
2687          * That is the job of nlink repair.
2688          *
2689          * We just fill the record and return
2690          */
2691         rec->found_dir_item = 1;
2692         rec->imode = mode | btrfs_type_to_imode(filetype);
2693         rec->nlink = 0;
2694         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2695         /* Ensure the inode_nlinks repair function will be called */
2696         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2697 out:
2698         return ret;
2699 }
2700
2701 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2702                                       struct btrfs_root *root,
2703                                       struct btrfs_path *path,
2704                                       struct inode_record *rec)
2705 {
2706         struct orphan_data_extent *orphan;
2707         struct orphan_data_extent *tmp;
2708         int ret = 0;
2709
2710         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2711                 /*
2712                  * Check for conflicting file extents
2713                  *
2714                  * Here we don't know whether the extents is compressed or not,
2715                  * so we can only assume it not compressed nor data offset,
2716                  * and use its disk_len as extent length.
2717                  */
2718                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2719                                        orphan->offset, orphan->disk_len, 0);
2720                 btrfs_release_path(path);
2721                 if (ret < 0)
2722                         goto out;
2723                 if (!ret) {
2724                         fprintf(stderr,
2725                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2726                                 orphan->disk_bytenr, orphan->disk_len);
2727                         ret = btrfs_free_extent(trans,
2728                                         root->fs_info->extent_root,
2729                                         orphan->disk_bytenr, orphan->disk_len,
2730                                         0, root->objectid, orphan->objectid,
2731                                         orphan->offset);
2732                         if (ret < 0)
2733                                 goto out;
2734                 }
2735                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2736                                 orphan->offset, orphan->disk_bytenr,
2737                                 orphan->disk_len, orphan->disk_len);
2738                 if (ret < 0)
2739                         goto out;
2740
2741                 /* Update file size info */
2742                 rec->found_size += orphan->disk_len;
2743                 if (rec->found_size == rec->nbytes)
2744                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2745
2746                 /* Update the file extent hole info too */
2747                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2748                                            orphan->disk_len);
2749                 if (ret < 0)
2750                         goto out;
2751                 if (RB_EMPTY_ROOT(&rec->holes))
2752                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2753
2754                 list_del(&orphan->list);
2755                 free(orphan);
2756         }
2757         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2758 out:
2759         return ret;
2760 }
2761
2762 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2763                                         struct btrfs_root *root,
2764                                         struct btrfs_path *path,
2765                                         struct inode_record *rec)
2766 {
2767         struct rb_node *node;
2768         struct file_extent_hole *hole;
2769         int found = 0;
2770         int ret = 0;
2771
2772         node = rb_first(&rec->holes);
2773
2774         while (node) {
2775                 found = 1;
2776                 hole = rb_entry(node, struct file_extent_hole, node);
2777                 ret = btrfs_punch_hole(trans, root, rec->ino,
2778                                        hole->start, hole->len);
2779                 if (ret < 0)
2780                         goto out;
2781                 ret = del_file_extent_hole(&rec->holes, hole->start,
2782                                            hole->len);
2783                 if (ret < 0)
2784                         goto out;
2785                 if (RB_EMPTY_ROOT(&rec->holes))
2786                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2787                 node = rb_first(&rec->holes);
2788         }
2789         /* special case for a file losing all its file extent */
2790         if (!found) {
2791                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2792                                        round_up(rec->isize, root->sectorsize));
2793                 if (ret < 0)
2794                         goto out;
2795         }
2796         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2797                rec->ino, root->objectid);
2798 out:
2799         return ret;
2800 }
2801
2802 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2803 {
2804         struct btrfs_trans_handle *trans;
2805         struct btrfs_path *path;
2806         int ret = 0;
2807
2808         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2809                              I_ERR_NO_ORPHAN_ITEM |
2810                              I_ERR_LINK_COUNT_WRONG |
2811                              I_ERR_NO_INODE_ITEM |
2812                              I_ERR_FILE_EXTENT_ORPHAN |
2813                              I_ERR_FILE_EXTENT_DISCOUNT|
2814                              I_ERR_FILE_NBYTES_WRONG)))
2815                 return rec->errors;
2816
2817         path = btrfs_alloc_path();
2818         if (!path)
2819                 return -ENOMEM;
2820
2821         /*
2822          * For nlink repair, it may create a dir and add link, so
2823          * 2 for parent(256)'s dir_index and dir_item
2824          * 2 for lost+found dir's inode_item and inode_ref
2825          * 1 for the new inode_ref of the file
2826          * 2 for lost+found dir's dir_index and dir_item for the file
2827          */
2828         trans = btrfs_start_transaction(root, 7);
2829         if (IS_ERR(trans)) {
2830                 btrfs_free_path(path);
2831                 return PTR_ERR(trans);
2832         }
2833
2834         if (rec->errors & I_ERR_NO_INODE_ITEM)
2835                 ret = repair_inode_no_item(trans, root, path, rec);
2836         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2837                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2838         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2839                 ret = repair_inode_discount_extent(trans, root, path, rec);
2840         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2841                 ret = repair_inode_isize(trans, root, path, rec);
2842         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2843                 ret = repair_inode_orphan_item(trans, root, path, rec);
2844         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2845                 ret = repair_inode_nlinks(trans, root, path, rec);
2846         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2847                 ret = repair_inode_nbytes(trans, root, path, rec);
2848         btrfs_commit_transaction(trans, root);
2849         btrfs_free_path(path);
2850         return ret;
2851 }
2852
2853 static int check_inode_recs(struct btrfs_root *root,
2854                             struct cache_tree *inode_cache)
2855 {
2856         struct cache_extent *cache;
2857         struct ptr_node *node;
2858         struct inode_record *rec;
2859         struct inode_backref *backref;
2860         int stage = 0;
2861         int ret = 0;
2862         int err = 0;
2863         u64 error = 0;
2864         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2865
2866         if (btrfs_root_refs(&root->root_item) == 0) {
2867                 if (!cache_tree_empty(inode_cache))
2868                         fprintf(stderr, "warning line %d\n", __LINE__);
2869                 return 0;
2870         }
2871
2872         /*
2873          * We need to record the highest inode number for later 'lost+found'
2874          * dir creation.
2875          * We must select a ino not used/refered by any existing inode, or
2876          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2877          * this may cause 'lost+found' dir has wrong nlinks.
2878          */
2879         cache = last_cache_extent(inode_cache);
2880         if (cache) {
2881                 node = container_of(cache, struct ptr_node, cache);
2882                 rec = node->data;
2883                 if (rec->ino > root->highest_inode)
2884                         root->highest_inode = rec->ino;
2885         }
2886
2887         /*
2888          * We need to repair backrefs first because we could change some of the
2889          * errors in the inode recs.
2890          *
2891          * We also need to go through and delete invalid backrefs first and then
2892          * add the correct ones second.  We do this because we may get EEXIST
2893          * when adding back the correct index because we hadn't yet deleted the
2894          * invalid index.
2895          *
2896          * For example, if we were missing a dir index then the directories
2897          * isize would be wrong, so if we fixed the isize to what we thought it
2898          * would be and then fixed the backref we'd still have a invalid fs, so
2899          * we need to add back the dir index and then check to see if the isize
2900          * is still wrong.
2901          */
2902         while (stage < 3) {
2903                 stage++;
2904                 if (stage == 3 && !err)
2905                         break;
2906
2907                 cache = search_cache_extent(inode_cache, 0);
2908                 while (repair && cache) {
2909                         node = container_of(cache, struct ptr_node, cache);
2910                         rec = node->data;
2911                         cache = next_cache_extent(cache);
2912
2913                         /* Need to free everything up and rescan */
2914                         if (stage == 3) {
2915                                 remove_cache_extent(inode_cache, &node->cache);
2916                                 free(node);
2917                                 free_inode_rec(rec);
2918                                 continue;
2919                         }
2920
2921                         if (list_empty(&rec->backrefs))
2922                                 continue;
2923
2924                         ret = repair_inode_backrefs(root, rec, inode_cache,
2925                                                     stage == 1);
2926                         if (ret < 0) {
2927                                 err = ret;
2928                                 stage = 2;
2929                                 break;
2930                         } if (ret > 0) {
2931                                 err = -EAGAIN;
2932                         }
2933                 }
2934         }
2935         if (err)
2936                 return err;
2937
2938         rec = get_inode_rec(inode_cache, root_dirid, 0);
2939         BUG_ON(IS_ERR(rec));
2940         if (rec) {
2941                 ret = check_root_dir(rec);
2942                 if (ret) {
2943                         fprintf(stderr, "root %llu root dir %llu error\n",
2944                                 (unsigned long long)root->root_key.objectid,
2945                                 (unsigned long long)root_dirid);
2946                         print_inode_error(root, rec);
2947                         error++;
2948                 }
2949         } else {
2950                 if (repair) {
2951                         struct btrfs_trans_handle *trans;
2952
2953                         trans = btrfs_start_transaction(root, 1);
2954                         if (IS_ERR(trans)) {
2955                                 err = PTR_ERR(trans);
2956                                 return err;
2957                         }
2958
2959                         fprintf(stderr,
2960                                 "root %llu missing its root dir, recreating\n",
2961                                 (unsigned long long)root->objectid);
2962
2963                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2964                         BUG_ON(ret);
2965
2966                         btrfs_commit_transaction(trans, root);
2967                         return -EAGAIN;
2968                 }
2969
2970                 fprintf(stderr, "root %llu root dir %llu not found\n",
2971                         (unsigned long long)root->root_key.objectid,
2972                         (unsigned long long)root_dirid);
2973         }
2974
2975         while (1) {
2976                 cache = search_cache_extent(inode_cache, 0);
2977                 if (!cache)
2978                         break;
2979                 node = container_of(cache, struct ptr_node, cache);
2980                 rec = node->data;
2981                 remove_cache_extent(inode_cache, &node->cache);
2982                 free(node);
2983                 if (rec->ino == root_dirid ||
2984                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2985                         free_inode_rec(rec);
2986                         continue;
2987                 }
2988
2989                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2990                         ret = check_orphan_item(root, rec->ino);
2991                         if (ret == 0)
2992                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2993                         if (can_free_inode_rec(rec)) {
2994                                 free_inode_rec(rec);
2995                                 continue;
2996                         }
2997                 }
2998
2999                 if (!rec->found_inode_item)
3000                         rec->errors |= I_ERR_NO_INODE_ITEM;
3001                 if (rec->found_link != rec->nlink)
3002                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3003                 if (repair) {
3004                         ret = try_repair_inode(root, rec);
3005                         if (ret == 0 && can_free_inode_rec(rec)) {
3006                                 free_inode_rec(rec);
3007                                 continue;
3008                         }
3009                         ret = 0;
3010                 }
3011
3012                 if (!(repair && ret == 0))
3013                         error++;
3014                 print_inode_error(root, rec);
3015                 list_for_each_entry(backref, &rec->backrefs, list) {
3016                         if (!backref->found_dir_item)
3017                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3018                         if (!backref->found_dir_index)
3019                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3020                         if (!backref->found_inode_ref)
3021                                 backref->errors |= REF_ERR_NO_INODE_REF;
3022                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3023                                 " namelen %u name %s filetype %d errors %x",
3024                                 (unsigned long long)backref->dir,
3025                                 (unsigned long long)backref->index,
3026                                 backref->namelen, backref->name,
3027                                 backref->filetype, backref->errors);
3028                         print_ref_error(backref->errors);
3029                 }
3030                 free_inode_rec(rec);
3031         }
3032         return (error > 0) ? -1 : 0;
3033 }
3034
3035 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3036                                         u64 objectid)
3037 {
3038         struct cache_extent *cache;
3039         struct root_record *rec = NULL;
3040         int ret;
3041
3042         cache = lookup_cache_extent(root_cache, objectid, 1);
3043         if (cache) {
3044                 rec = container_of(cache, struct root_record, cache);
3045         } else {
3046                 rec = calloc(1, sizeof(*rec));
3047                 if (!rec)
3048                         return ERR_PTR(-ENOMEM);
3049                 rec->objectid = objectid;
3050                 INIT_LIST_HEAD(&rec->backrefs);
3051                 rec->cache.start = objectid;
3052                 rec->cache.size = 1;
3053
3054                 ret = insert_cache_extent(root_cache, &rec->cache);
3055                 if (ret)
3056                         return ERR_PTR(-EEXIST);
3057         }
3058         return rec;
3059 }
3060
3061 static struct root_backref *get_root_backref(struct root_record *rec,
3062                                              u64 ref_root, u64 dir, u64 index,
3063                                              const char *name, int namelen)
3064 {
3065         struct root_backref *backref;
3066
3067         list_for_each_entry(backref, &rec->backrefs, list) {
3068                 if (backref->ref_root != ref_root || backref->dir != dir ||
3069                     backref->namelen != namelen)
3070                         continue;
3071                 if (memcmp(name, backref->name, namelen))
3072                         continue;
3073                 return backref;
3074         }
3075
3076         backref = calloc(1, sizeof(*backref) + namelen + 1);
3077         if (!backref)
3078                 return NULL;
3079         backref->ref_root = ref_root;
3080         backref->dir = dir;
3081         backref->index = index;
3082         backref->namelen = namelen;
3083         memcpy(backref->name, name, namelen);
3084         backref->name[namelen] = '\0';
3085         list_add_tail(&backref->list, &rec->backrefs);
3086         return backref;
3087 }
3088
3089 static void free_root_record(struct cache_extent *cache)
3090 {
3091         struct root_record *rec;
3092         struct root_backref *backref;
3093
3094         rec = container_of(cache, struct root_record, cache);
3095         while (!list_empty(&rec->backrefs)) {
3096                 backref = list_entry(rec->backrefs.next,
3097                                      struct root_backref, list);
3098                 list_del(&backref->list);
3099                 free(backref);
3100         }
3101
3102         kfree(rec);
3103 }
3104
3105 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3106
3107 static int add_root_backref(struct cache_tree *root_cache,
3108                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3109                             const char *name, int namelen,
3110                             int item_type, int errors)
3111 {
3112         struct root_record *rec;
3113         struct root_backref *backref;
3114
3115         rec = get_root_rec(root_cache, root_id);
3116         BUG_ON(IS_ERR(rec));
3117         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3118         BUG_ON(!backref);
3119
3120         backref->errors |= errors;
3121
3122         if (item_type != BTRFS_DIR_ITEM_KEY) {
3123                 if (backref->found_dir_index || backref->found_back_ref ||
3124                     backref->found_forward_ref) {
3125                         if (backref->index != index)
3126                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3127                 } else {
3128                         backref->index = index;
3129                 }
3130         }
3131
3132         if (item_type == BTRFS_DIR_ITEM_KEY) {
3133                 if (backref->found_forward_ref)
3134                         rec->found_ref++;
3135                 backref->found_dir_item = 1;
3136         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3137                 backref->found_dir_index = 1;
3138         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3139                 if (backref->found_forward_ref)
3140                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3141                 else if (backref->found_dir_item)
3142                         rec->found_ref++;
3143                 backref->found_forward_ref = 1;
3144         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3145                 if (backref->found_back_ref)
3146                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3147                 backref->found_back_ref = 1;
3148         } else {
3149                 BUG_ON(1);
3150         }
3151
3152         if (backref->found_forward_ref && backref->found_dir_item)
3153                 backref->reachable = 1;
3154         return 0;
3155 }
3156
3157 static int merge_root_recs(struct btrfs_root *root,
3158                            struct cache_tree *src_cache,
3159                            struct cache_tree *dst_cache)
3160 {
3161         struct cache_extent *cache;
3162         struct ptr_node *node;
3163         struct inode_record *rec;
3164         struct inode_backref *backref;
3165         int ret = 0;
3166
3167         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3168                 free_inode_recs_tree(src_cache);
3169                 return 0;
3170         }
3171
3172         while (1) {
3173                 cache = search_cache_extent(src_cache, 0);
3174                 if (!cache)
3175                         break;
3176                 node = container_of(cache, struct ptr_node, cache);
3177                 rec = node->data;
3178                 remove_cache_extent(src_cache, &node->cache);
3179                 free(node);
3180
3181                 ret = is_child_root(root, root->objectid, rec->ino);
3182                 if (ret < 0)
3183                         break;
3184                 else if (ret == 0)
3185                         goto skip;
3186
3187                 list_for_each_entry(backref, &rec->backrefs, list) {
3188                         BUG_ON(backref->found_inode_ref);
3189                         if (backref->found_dir_item)
3190                                 add_root_backref(dst_cache, rec->ino,
3191                                         root->root_key.objectid, backref->dir,
3192                                         backref->index, backref->name,
3193                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3194                                         backref->errors);
3195                         if (backref->found_dir_index)
3196                                 add_root_backref(dst_cache, rec->ino,
3197                                         root->root_key.objectid, backref->dir,
3198                                         backref->index, backref->name,
3199                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3200                                         backref->errors);
3201                 }
3202 skip:
3203                 free_inode_rec(rec);
3204         }
3205         if (ret < 0)
3206                 return ret;
3207         return 0;
3208 }
3209
3210 static int check_root_refs(struct btrfs_root *root,
3211                            struct cache_tree *root_cache)
3212 {
3213         struct root_record *rec;
3214         struct root_record *ref_root;
3215         struct root_backref *backref;
3216         struct cache_extent *cache;
3217         int loop = 1;
3218         int ret;
3219         int error;
3220         int errors = 0;
3221
3222         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3223         BUG_ON(IS_ERR(rec));
3224         rec->found_ref = 1;
3225
3226         /* fixme: this can not detect circular references */
3227         while (loop) {
3228                 loop = 0;
3229                 cache = search_cache_extent(root_cache, 0);
3230                 while (1) {
3231                         if (!cache)
3232                                 break;
3233                         rec = container_of(cache, struct root_record, cache);
3234                         cache = next_cache_extent(cache);
3235
3236                         if (rec->found_ref == 0)
3237                                 continue;
3238
3239                         list_for_each_entry(backref, &rec->backrefs, list) {
3240                                 if (!backref->reachable)
3241                                         continue;
3242
3243                                 ref_root = get_root_rec(root_cache,
3244                                                         backref->ref_root);
3245                                 BUG_ON(IS_ERR(ref_root));
3246                                 if (ref_root->found_ref > 0)
3247                                         continue;
3248
3249                                 backref->reachable = 0;
3250                                 rec->found_ref--;
3251                                 if (rec->found_ref == 0)
3252                                         loop = 1;
3253                         }
3254                 }
3255         }
3256
3257         cache = search_cache_extent(root_cache, 0);
3258         while (1) {
3259                 if (!cache)
3260                         break;
3261                 rec = container_of(cache, struct root_record, cache);
3262                 cache = next_cache_extent(cache);
3263
3264                 if (rec->found_ref == 0 &&
3265                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3266                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3267                         ret = check_orphan_item(root->fs_info->tree_root,
3268                                                 rec->objectid);
3269                         if (ret == 0)
3270                                 continue;
3271
3272                         /*
3273                          * If we don't have a root item then we likely just have
3274                          * a dir item in a snapshot for this root but no actual
3275                          * ref key or anything so it's meaningless.
3276                          */
3277                         if (!rec->found_root_item)
3278                                 continue;
3279                         errors++;
3280                         fprintf(stderr, "fs tree %llu not referenced\n",
3281                                 (unsigned long long)rec->objectid);
3282                 }
3283
3284                 error = 0;
3285                 if (rec->found_ref > 0 && !rec->found_root_item)
3286                         error = 1;
3287                 list_for_each_entry(backref, &rec->backrefs, list) {
3288                         if (!backref->found_dir_item)
3289                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3290                         if (!backref->found_dir_index)
3291                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3292                         if (!backref->found_back_ref)
3293                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3294                         if (!backref->found_forward_ref)
3295                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3296                         if (backref->reachable && backref->errors)
3297                                 error = 1;
3298                 }
3299                 if (!error)
3300                         continue;
3301
3302                 errors++;
3303                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3304                         (unsigned long long)rec->objectid, rec->found_ref,
3305                          rec->found_root_item ? "" : "not found");
3306
3307                 list_for_each_entry(backref, &rec->backrefs, list) {
3308                         if (!backref->reachable)
3309                                 continue;
3310                         if (!backref->errors && rec->found_root_item)
3311                                 continue;
3312                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3313                                 " index %llu namelen %u name %s errors %x\n",
3314                                 (unsigned long long)backref->ref_root,
3315                                 (unsigned long long)backref->dir,
3316                                 (unsigned long long)backref->index,
3317                                 backref->namelen, backref->name,
3318                                 backref->errors);
3319                         print_ref_error(backref->errors);
3320                 }
3321         }
3322         return errors > 0 ? 1 : 0;
3323 }
3324
3325 static int process_root_ref(struct extent_buffer *eb, int slot,
3326                             struct btrfs_key *key,
3327                             struct cache_tree *root_cache)
3328 {
3329         u64 dirid;
3330         u64 index;
3331         u32 len;
3332         u32 name_len;
3333         struct btrfs_root_ref *ref;
3334         char namebuf[BTRFS_NAME_LEN];
3335         int error;
3336
3337         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3338
3339         dirid = btrfs_root_ref_dirid(eb, ref);
3340         index = btrfs_root_ref_sequence(eb, ref);
3341         name_len = btrfs_root_ref_name_len(eb, ref);
3342
3343         if (name_len <= BTRFS_NAME_LEN) {
3344                 len = name_len;
3345                 error = 0;
3346         } else {
3347                 len = BTRFS_NAME_LEN;
3348                 error = REF_ERR_NAME_TOO_LONG;
3349         }
3350         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3351
3352         if (key->type == BTRFS_ROOT_REF_KEY) {
3353                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3354                                  index, namebuf, len, key->type, error);
3355         } else {
3356                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3357                                  index, namebuf, len, key->type, error);
3358         }
3359         return 0;
3360 }
3361
3362 static void free_corrupt_block(struct cache_extent *cache)
3363 {
3364         struct btrfs_corrupt_block *corrupt;
3365
3366         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3367         free(corrupt);
3368 }
3369
3370 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3371
3372 /*
3373  * Repair the btree of the given root.
3374  *
3375  * The fix is to remove the node key in corrupt_blocks cache_tree.
3376  * and rebalance the tree.
3377  * After the fix, the btree should be writeable.
3378  */
3379 static int repair_btree(struct btrfs_root *root,
3380                         struct cache_tree *corrupt_blocks)
3381 {
3382         struct btrfs_trans_handle *trans;
3383         struct btrfs_path *path;
3384         struct btrfs_corrupt_block *corrupt;
3385         struct cache_extent *cache;
3386         struct btrfs_key key;
3387         u64 offset;
3388         int level;
3389         int ret = 0;
3390
3391         if (cache_tree_empty(corrupt_blocks))
3392                 return 0;
3393
3394         path = btrfs_alloc_path();
3395         if (!path)
3396                 return -ENOMEM;
3397
3398         trans = btrfs_start_transaction(root, 1);
3399         if (IS_ERR(trans)) {
3400                 ret = PTR_ERR(trans);
3401                 fprintf(stderr, "Error starting transaction: %s\n",
3402                         strerror(-ret));
3403                 goto out_free_path;
3404         }
3405         cache = first_cache_extent(corrupt_blocks);
3406         while (cache) {
3407                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3408                                        cache);
3409                 level = corrupt->level;
3410                 path->lowest_level = level;
3411                 key.objectid = corrupt->key.objectid;
3412                 key.type = corrupt->key.type;
3413                 key.offset = corrupt->key.offset;
3414
3415                 /*
3416                  * Here we don't want to do any tree balance, since it may
3417                  * cause a balance with corrupted brother leaf/node,
3418                  * so ins_len set to 0 here.
3419                  * Balance will be done after all corrupt node/leaf is deleted.
3420                  */
3421                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3422                 if (ret < 0)
3423                         goto out;
3424                 offset = btrfs_node_blockptr(path->nodes[level],
3425                                              path->slots[level]);
3426
3427                 /* Remove the ptr */
3428                 ret = btrfs_del_ptr(trans, root, path, level,
3429                                     path->slots[level]);
3430                 if (ret < 0)
3431                         goto out;
3432                 /*
3433                  * Remove the corresponding extent
3434                  * return value is not concerned.
3435                  */
3436                 btrfs_release_path(path);
3437                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3438                                         0, root->root_key.objectid,
3439                                         level - 1, 0);
3440                 cache = next_cache_extent(cache);
3441         }
3442
3443         /* Balance the btree using btrfs_search_slot() */
3444         cache = first_cache_extent(corrupt_blocks);
3445         while (cache) {
3446                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3447                                        cache);
3448                 memcpy(&key, &corrupt->key, sizeof(key));
3449                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3450                 if (ret < 0)
3451                         goto out;
3452                 /* return will always >0 since it won't find the item */
3453                 ret = 0;
3454                 btrfs_release_path(path);
3455                 cache = next_cache_extent(cache);
3456         }
3457 out:
3458         btrfs_commit_transaction(trans, root);
3459 out_free_path:
3460         btrfs_free_path(path);
3461         return ret;
3462 }
3463
3464 static int check_fs_root(struct btrfs_root *root,
3465                          struct cache_tree *root_cache,
3466                          struct walk_control *wc)
3467 {
3468         int ret = 0;
3469         int err = 0;
3470         int wret;
3471         int level;
3472         struct btrfs_path path;
3473         struct shared_node root_node;
3474         struct root_record *rec;
3475         struct btrfs_root_item *root_item = &root->root_item;
3476         struct cache_tree corrupt_blocks;
3477         struct orphan_data_extent *orphan;
3478         struct orphan_data_extent *tmp;
3479         enum btrfs_tree_block_status status;
3480
3481         /*
3482          * Reuse the corrupt_block cache tree to record corrupted tree block
3483          *
3484          * Unlike the usage in extent tree check, here we do it in a per
3485          * fs/subvol tree base.
3486          */
3487         cache_tree_init(&corrupt_blocks);
3488         root->fs_info->corrupt_blocks = &corrupt_blocks;
3489
3490         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3491                 rec = get_root_rec(root_cache, root->root_key.objectid);
3492                 BUG_ON(IS_ERR(rec));
3493                 if (btrfs_root_refs(root_item) > 0)
3494                         rec->found_root_item = 1;
3495         }
3496
3497         btrfs_init_path(&path);
3498         memset(&root_node, 0, sizeof(root_node));
3499         cache_tree_init(&root_node.root_cache);
3500         cache_tree_init(&root_node.inode_cache);
3501
3502         /* Move the orphan extent record to corresponding inode_record */
3503         list_for_each_entry_safe(orphan, tmp,
3504                                  &root->orphan_data_extents, list) {
3505                 struct inode_record *inode;
3506
3507                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3508                                       1);
3509                 BUG_ON(IS_ERR(inode));
3510                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3511                 list_move(&orphan->list, &inode->orphan_extents);
3512         }
3513
3514         level = btrfs_header_level(root->node);
3515         memset(wc->nodes, 0, sizeof(wc->nodes));
3516         wc->nodes[level] = &root_node;
3517         wc->active_node = level;
3518         wc->root_level = level;
3519
3520         /* We may not have checked the root block, lets do that now */
3521         if (btrfs_is_leaf(root->node))
3522                 status = btrfs_check_leaf(root, NULL, root->node);
3523         else
3524                 status = btrfs_check_node(root, NULL, root->node);
3525         if (status != BTRFS_TREE_BLOCK_CLEAN)
3526                 return -EIO;
3527
3528         if (btrfs_root_refs(root_item) > 0 ||
3529             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3530                 path.nodes[level] = root->node;
3531                 extent_buffer_get(root->node);
3532                 path.slots[level] = 0;
3533         } else {
3534                 struct btrfs_key key;
3535                 struct btrfs_disk_key found_key;
3536
3537                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3538                 level = root_item->drop_level;
3539                 path.lowest_level = level;
3540                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3541                 if (wret < 0)
3542                         goto skip_walking;
3543                 btrfs_node_key(path.nodes[level], &found_key,
3544                                 path.slots[level]);
3545                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3546                                         sizeof(found_key)));
3547         }
3548
3549         while (1) {
3550                 wret = walk_down_tree(root, &path, wc, &level);
3551                 if (wret < 0)
3552                         ret = wret;
3553                 if (wret != 0)
3554                         break;
3555
3556                 wret = walk_up_tree(root, &path, wc, &level);
3557                 if (wret < 0)
3558                         ret = wret;
3559                 if (wret != 0)
3560                         break;
3561         }
3562 skip_walking:
3563         btrfs_release_path(&path);
3564
3565         if (!cache_tree_empty(&corrupt_blocks)) {
3566                 struct cache_extent *cache;
3567                 struct btrfs_corrupt_block *corrupt;
3568
3569                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3570                        root->root_key.objectid);
3571                 cache = first_cache_extent(&corrupt_blocks);
3572                 while (cache) {
3573                         corrupt = container_of(cache,
3574                                                struct btrfs_corrupt_block,
3575                                                cache);
3576                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3577                                cache->start, corrupt->level,
3578                                corrupt->key.objectid, corrupt->key.type,
3579                                corrupt->key.offset);
3580                         cache = next_cache_extent(cache);
3581                 }
3582                 if (repair) {
3583                         printf("Try to repair the btree for root %llu\n",
3584                                root->root_key.objectid);
3585                         ret = repair_btree(root, &corrupt_blocks);
3586                         if (ret < 0)
3587                                 fprintf(stderr, "Failed to repair btree: %s\n",
3588                                         strerror(-ret));
3589                         if (!ret)
3590                                 printf("Btree for root %llu is fixed\n",
3591                                        root->root_key.objectid);
3592                 }
3593         }
3594
3595         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3596         if (err < 0)
3597                 ret = err;
3598
3599         if (root_node.current) {
3600                 root_node.current->checked = 1;
3601                 maybe_free_inode_rec(&root_node.inode_cache,
3602                                 root_node.current);
3603         }
3604
3605         err = check_inode_recs(root, &root_node.inode_cache);
3606         if (!ret)
3607                 ret = err;
3608
3609         free_corrupt_blocks_tree(&corrupt_blocks);
3610         root->fs_info->corrupt_blocks = NULL;
3611         free_orphan_data_extents(&root->orphan_data_extents);
3612         return ret;
3613 }
3614
3615 static int fs_root_objectid(u64 objectid)
3616 {
3617         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3618             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3619                 return 1;
3620         return is_fstree(objectid);
3621 }
3622
3623 static int check_fs_roots(struct btrfs_root *root,
3624                           struct cache_tree *root_cache)
3625 {
3626         struct btrfs_path path;
3627         struct btrfs_key key;
3628         struct walk_control wc;
3629         struct extent_buffer *leaf, *tree_node;
3630         struct btrfs_root *tmp_root;
3631         struct btrfs_root *tree_root = root->fs_info->tree_root;
3632         int ret;
3633         int err = 0;
3634
3635         if (ctx.progress_enabled) {
3636                 ctx.tp = TASK_FS_ROOTS;
3637                 task_start(ctx.info);
3638         }
3639
3640         /*
3641          * Just in case we made any changes to the extent tree that weren't
3642          * reflected into the free space cache yet.
3643          */
3644         if (repair)
3645                 reset_cached_block_groups(root->fs_info);
3646         memset(&wc, 0, sizeof(wc));
3647         cache_tree_init(&wc.shared);
3648         btrfs_init_path(&path);
3649
3650 again:
3651         key.offset = 0;
3652         key.objectid = 0;
3653         key.type = BTRFS_ROOT_ITEM_KEY;
3654         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3655         if (ret < 0) {
3656                 err = 1;
3657                 goto out;
3658         }
3659         tree_node = tree_root->node;
3660         while (1) {
3661                 if (tree_node != tree_root->node) {
3662                         free_root_recs_tree(root_cache);
3663                         btrfs_release_path(&path);
3664                         goto again;
3665                 }
3666                 leaf = path.nodes[0];
3667                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3668                         ret = btrfs_next_leaf(tree_root, &path);
3669                         if (ret) {
3670                                 if (ret < 0)
3671                                         err = 1;
3672                                 break;
3673                         }
3674                         leaf = path.nodes[0];
3675                 }
3676                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3677                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3678                     fs_root_objectid(key.objectid)) {
3679                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3680                                 tmp_root = btrfs_read_fs_root_no_cache(
3681                                                 root->fs_info, &key);
3682                         } else {
3683                                 key.offset = (u64)-1;
3684                                 tmp_root = btrfs_read_fs_root(
3685                                                 root->fs_info, &key);
3686                         }
3687                         if (IS_ERR(tmp_root)) {
3688                                 err = 1;
3689                                 goto next;
3690                         }
3691                         ret = check_fs_root(tmp_root, root_cache, &wc);
3692                         if (ret == -EAGAIN) {
3693                                 free_root_recs_tree(root_cache);
3694                                 btrfs_release_path(&path);
3695                                 goto again;
3696                         }
3697                         if (ret)
3698                                 err = 1;
3699                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3700                                 btrfs_free_fs_root(tmp_root);
3701                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3702                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3703                         process_root_ref(leaf, path.slots[0], &key,
3704                                          root_cache);
3705                 }
3706 next:
3707                 path.slots[0]++;
3708         }
3709 out:
3710         btrfs_release_path(&path);
3711         if (err)
3712                 free_extent_cache_tree(&wc.shared);
3713         if (!cache_tree_empty(&wc.shared))
3714                 fprintf(stderr, "warning line %d\n", __LINE__);
3715
3716         task_stop(ctx.info);
3717
3718         return err;
3719 }
3720
3721 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3722 {
3723         struct list_head *cur = rec->backrefs.next;
3724         struct extent_backref *back;
3725         struct tree_backref *tback;
3726         struct data_backref *dback;
3727         u64 found = 0;
3728         int err = 0;
3729
3730         while(cur != &rec->backrefs) {
3731                 back = list_entry(cur, struct extent_backref, list);
3732                 cur = cur->next;
3733                 if (!back->found_extent_tree) {
3734                         err = 1;
3735                         if (!print_errs)
3736                                 goto out;
3737                         if (back->is_data) {
3738                                 dback = (struct data_backref *)back;
3739                                 fprintf(stderr, "Backref %llu %s %llu"
3740                                         " owner %llu offset %llu num_refs %lu"
3741                                         " not found in extent tree\n",
3742                                         (unsigned long long)rec->start,
3743                                         back->full_backref ?
3744                                         "parent" : "root",
3745                                         back->full_backref ?
3746                                         (unsigned long long)dback->parent:
3747                                         (unsigned long long)dback->root,
3748                                         (unsigned long long)dback->owner,
3749                                         (unsigned long long)dback->offset,
3750                                         (unsigned long)dback->num_refs);
3751                         } else {
3752                                 tback = (struct tree_backref *)back;
3753                                 fprintf(stderr, "Backref %llu parent %llu"
3754                                         " root %llu not found in extent tree\n",
3755                                         (unsigned long long)rec->start,
3756                                         (unsigned long long)tback->parent,
3757                                         (unsigned long long)tback->root);
3758                         }
3759                 }
3760                 if (!back->is_data && !back->found_ref) {
3761                         err = 1;
3762                         if (!print_errs)
3763                                 goto out;
3764                         tback = (struct tree_backref *)back;
3765                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3766                                 (unsigned long long)rec->start,
3767                                 back->full_backref ? "parent" : "root",
3768                                 back->full_backref ?
3769                                 (unsigned long long)tback->parent :
3770                                 (unsigned long long)tback->root, back);
3771                 }
3772                 if (back->is_data) {
3773                         dback = (struct data_backref *)back;
3774                         if (dback->found_ref != dback->num_refs) {
3775                                 err = 1;
3776                                 if (!print_errs)
3777                                         goto out;
3778                                 fprintf(stderr, "Incorrect local backref count"
3779                                         " on %llu %s %llu owner %llu"
3780                                         " offset %llu found %u wanted %u back %p\n",
3781                                         (unsigned long long)rec->start,
3782                                         back->full_backref ?
3783                                         "parent" : "root",
3784                                         back->full_backref ?
3785                                         (unsigned long long)dback->parent:
3786                                         (unsigned long long)dback->root,
3787                                         (unsigned long long)dback->owner,
3788                                         (unsigned long long)dback->offset,
3789                                         dback->found_ref, dback->num_refs, back);
3790                         }
3791                         if (dback->disk_bytenr != rec->start) {
3792                                 err = 1;
3793                                 if (!print_errs)
3794                                         goto out;
3795                                 fprintf(stderr, "Backref disk bytenr does not"
3796                                         " match extent record, bytenr=%llu, "
3797                                         "ref bytenr=%llu\n",
3798                                         (unsigned long long)rec->start,
3799                                         (unsigned long long)dback->disk_bytenr);
3800                         }
3801
3802                         if (dback->bytes != rec->nr) {
3803                                 err = 1;
3804                                 if (!print_errs)
3805                                         goto out;
3806                                 fprintf(stderr, "Backref bytes do not match "
3807                                         "extent backref, bytenr=%llu, ref "
3808                                         "bytes=%llu, backref bytes=%llu\n",
3809                                         (unsigned long long)rec->start,
3810                                         (unsigned long long)rec->nr,
3811                                         (unsigned long long)dback->bytes);
3812                         }
3813                 }
3814                 if (!back->is_data) {
3815                         found += 1;
3816                 } else {
3817                         dback = (struct data_backref *)back;
3818                         found += dback->found_ref;
3819                 }
3820         }
3821         if (found != rec->refs) {
3822                 err = 1;
3823                 if (!print_errs)
3824                         goto out;
3825                 fprintf(stderr, "Incorrect global backref count "
3826                         "on %llu found %llu wanted %llu\n",
3827                         (unsigned long long)rec->start,
3828                         (unsigned long long)found,
3829                         (unsigned long long)rec->refs);
3830         }
3831 out:
3832         return err;
3833 }
3834
3835 static int free_all_extent_backrefs(struct extent_record *rec)
3836 {
3837         struct extent_backref *back;
3838         struct list_head *cur;
3839         while (!list_empty(&rec->backrefs)) {
3840                 cur = rec->backrefs.next;
3841                 back = list_entry(cur, struct extent_backref, list);
3842                 list_del(cur);
3843                 free(back);
3844         }
3845         return 0;
3846 }
3847
3848 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3849                                      struct cache_tree *extent_cache)
3850 {
3851         struct cache_extent *cache;
3852         struct extent_record *rec;
3853
3854         while (1) {
3855                 cache = first_cache_extent(extent_cache);
3856                 if (!cache)
3857                         break;
3858                 rec = container_of(cache, struct extent_record, cache);
3859                 remove_cache_extent(extent_cache, cache);
3860                 free_all_extent_backrefs(rec);
3861                 free(rec);
3862         }
3863 }
3864
3865 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3866                                  struct extent_record *rec)
3867 {
3868         if (rec->content_checked && rec->owner_ref_checked &&
3869             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3870             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3871             !rec->bad_full_backref && !rec->crossing_stripes &&
3872             !rec->wrong_chunk_type) {
3873                 remove_cache_extent(extent_cache, &rec->cache);
3874                 free_all_extent_backrefs(rec);
3875                 list_del_init(&rec->list);
3876                 free(rec);
3877         }
3878         return 0;
3879 }
3880
3881 static int check_owner_ref(struct btrfs_root *root,
3882                             struct extent_record *rec,
3883                             struct extent_buffer *buf)
3884 {
3885         struct extent_backref *node;
3886         struct tree_backref *back;
3887         struct btrfs_root *ref_root;
3888         struct btrfs_key key;
3889         struct btrfs_path path;
3890         struct extent_buffer *parent;
3891         int level;
3892         int found = 0;
3893         int ret;
3894
3895         list_for_each_entry(node, &rec->backrefs, list) {
3896                 if (node->is_data)
3897                         continue;
3898                 if (!node->found_ref)
3899                         continue;
3900                 if (node->full_backref)
3901                         continue;
3902                 back = (struct tree_backref *)node;
3903                 if (btrfs_header_owner(buf) == back->root)
3904                         return 0;
3905         }
3906         BUG_ON(rec->is_root);
3907
3908         /* try to find the block by search corresponding fs tree */
3909         key.objectid = btrfs_header_owner(buf);
3910         key.type = BTRFS_ROOT_ITEM_KEY;
3911         key.offset = (u64)-1;
3912
3913         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3914         if (IS_ERR(ref_root))
3915                 return 1;
3916
3917         level = btrfs_header_level(buf);
3918         if (level == 0)
3919                 btrfs_item_key_to_cpu(buf, &key, 0);
3920         else
3921                 btrfs_node_key_to_cpu(buf, &key, 0);
3922
3923         btrfs_init_path(&path);
3924         path.lowest_level = level + 1;
3925         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3926         if (ret < 0)
3927                 return 0;
3928
3929         parent = path.nodes[level + 1];
3930         if (parent && buf->start == btrfs_node_blockptr(parent,
3931                                                         path.slots[level + 1]))
3932                 found = 1;
3933
3934         btrfs_release_path(&path);
3935         return found ? 0 : 1;
3936 }
3937
3938 static int is_extent_tree_record(struct extent_record *rec)
3939 {
3940         struct list_head *cur = rec->backrefs.next;
3941         struct extent_backref *node;
3942         struct tree_backref *back;
3943         int is_extent = 0;
3944
3945         while(cur != &rec->backrefs) {
3946                 node = list_entry(cur, struct extent_backref, list);
3947                 cur = cur->next;
3948                 if (node->is_data)
3949                         return 0;
3950                 back = (struct tree_backref *)node;
3951                 if (node->full_backref)
3952                         return 0;
3953                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3954                         is_extent = 1;
3955         }
3956         return is_extent;
3957 }
3958
3959
3960 static int record_bad_block_io(struct btrfs_fs_info *info,
3961                                struct cache_tree *extent_cache,
3962                                u64 start, u64 len)
3963 {
3964         struct extent_record *rec;
3965         struct cache_extent *cache;
3966         struct btrfs_key key;
3967
3968         cache = lookup_cache_extent(extent_cache, start, len);
3969         if (!cache)
3970                 return 0;
3971
3972         rec = container_of(cache, struct extent_record, cache);
3973         if (!is_extent_tree_record(rec))
3974                 return 0;
3975
3976         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3977         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3978 }
3979
3980 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3981                        struct extent_buffer *buf, int slot)
3982 {
3983         if (btrfs_header_level(buf)) {
3984                 struct btrfs_key_ptr ptr1, ptr2;
3985
3986                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3987                                    sizeof(struct btrfs_key_ptr));
3988                 read_extent_buffer(buf, &ptr2,
3989                                    btrfs_node_key_ptr_offset(slot + 1),
3990                                    sizeof(struct btrfs_key_ptr));
3991                 write_extent_buffer(buf, &ptr1,
3992                                     btrfs_node_key_ptr_offset(slot + 1),
3993                                     sizeof(struct btrfs_key_ptr));
3994                 write_extent_buffer(buf, &ptr2,
3995                                     btrfs_node_key_ptr_offset(slot),
3996                                     sizeof(struct btrfs_key_ptr));
3997                 if (slot == 0) {
3998                         struct btrfs_disk_key key;
3999                         btrfs_node_key(buf, &key, 0);
4000                         btrfs_fixup_low_keys(root, path, &key,
4001                                              btrfs_header_level(buf) + 1);
4002                 }
4003         } else {
4004                 struct btrfs_item *item1, *item2;
4005                 struct btrfs_key k1, k2;
4006                 char *item1_data, *item2_data;
4007                 u32 item1_offset, item2_offset, item1_size, item2_size;
4008
4009                 item1 = btrfs_item_nr(slot);
4010                 item2 = btrfs_item_nr(slot + 1);
4011                 btrfs_item_key_to_cpu(buf, &k1, slot);
4012                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4013                 item1_offset = btrfs_item_offset(buf, item1);
4014                 item2_offset = btrfs_item_offset(buf, item2);
4015                 item1_size = btrfs_item_size(buf, item1);
4016                 item2_size = btrfs_item_size(buf, item2);
4017
4018                 item1_data = malloc(item1_size);
4019                 if (!item1_data)
4020                         return -ENOMEM;
4021                 item2_data = malloc(item2_size);
4022                 if (!item2_data) {
4023                         free(item1_data);
4024                         return -ENOMEM;
4025                 }
4026
4027                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4028                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4029
4030                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4031                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4032                 free(item1_data);
4033                 free(item2_data);
4034
4035                 btrfs_set_item_offset(buf, item1, item2_offset);
4036                 btrfs_set_item_offset(buf, item2, item1_offset);
4037                 btrfs_set_item_size(buf, item1, item2_size);
4038                 btrfs_set_item_size(buf, item2, item1_size);
4039
4040                 path->slots[0] = slot;
4041                 btrfs_set_item_key_unsafe(root, path, &k2);
4042                 path->slots[0] = slot + 1;
4043                 btrfs_set_item_key_unsafe(root, path, &k1);
4044         }
4045         return 0;
4046 }
4047
4048 static int fix_key_order(struct btrfs_trans_handle *trans,
4049                          struct btrfs_root *root,
4050                          struct btrfs_path *path)
4051 {
4052         struct extent_buffer *buf;
4053         struct btrfs_key k1, k2;
4054         int i;
4055         int level = path->lowest_level;
4056         int ret = -EIO;
4057
4058         buf = path->nodes[level];
4059         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4060                 if (level) {
4061                         btrfs_node_key_to_cpu(buf, &k1, i);
4062                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4063                 } else {
4064                         btrfs_item_key_to_cpu(buf, &k1, i);
4065                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4066                 }
4067                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4068                         continue;
4069                 ret = swap_values(root, path, buf, i);
4070                 if (ret)
4071                         break;
4072                 btrfs_mark_buffer_dirty(buf);
4073                 i = 0;
4074         }
4075         return ret;
4076 }
4077
4078 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4079                              struct btrfs_root *root,
4080                              struct btrfs_path *path,
4081                              struct extent_buffer *buf, int slot)
4082 {
4083         struct btrfs_key key;
4084         int nritems = btrfs_header_nritems(buf);
4085
4086         btrfs_item_key_to_cpu(buf, &key, slot);
4087
4088         /* These are all the keys we can deal with missing. */
4089         if (key.type != BTRFS_DIR_INDEX_KEY &&
4090             key.type != BTRFS_EXTENT_ITEM_KEY &&
4091             key.type != BTRFS_METADATA_ITEM_KEY &&
4092             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4093             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4094                 return -1;
4095
4096         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4097                (unsigned long long)key.objectid, key.type,
4098                (unsigned long long)key.offset, slot, buf->start);
4099         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4100                               btrfs_item_nr_offset(slot + 1),
4101                               sizeof(struct btrfs_item) *
4102                               (nritems - slot - 1));
4103         btrfs_set_header_nritems(buf, nritems - 1);
4104         if (slot == 0) {
4105                 struct btrfs_disk_key disk_key;
4106
4107                 btrfs_item_key(buf, &disk_key, 0);
4108                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4109         }
4110         btrfs_mark_buffer_dirty(buf);
4111         return 0;
4112 }
4113
4114 static int fix_item_offset(struct btrfs_trans_handle *trans,
4115                            struct btrfs_root *root,
4116                            struct btrfs_path *path)
4117 {
4118         struct extent_buffer *buf;
4119         int i;
4120         int ret = 0;
4121
4122         /* We should only get this for leaves */
4123         BUG_ON(path->lowest_level);
4124         buf = path->nodes[0];
4125 again:
4126         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4127                 unsigned int shift = 0, offset;
4128
4129                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4130                     BTRFS_LEAF_DATA_SIZE(root)) {
4131                         if (btrfs_item_end_nr(buf, i) >
4132                             BTRFS_LEAF_DATA_SIZE(root)) {
4133                                 ret = delete_bogus_item(trans, root, path,
4134                                                         buf, i);
4135                                 if (!ret)
4136                                         goto again;
4137                                 fprintf(stderr, "item is off the end of the "
4138                                         "leaf, can't fix\n");
4139                                 ret = -EIO;
4140                                 break;
4141                         }
4142                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4143                                 btrfs_item_end_nr(buf, i);
4144                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4145                            btrfs_item_offset_nr(buf, i - 1)) {
4146                         if (btrfs_item_end_nr(buf, i) >
4147                             btrfs_item_offset_nr(buf, i - 1)) {
4148                                 ret = delete_bogus_item(trans, root, path,
4149                                                         buf, i);
4150                                 if (!ret)
4151                                         goto again;
4152                                 fprintf(stderr, "items overlap, can't fix\n");
4153                                 ret = -EIO;
4154                                 break;
4155                         }
4156                         shift = btrfs_item_offset_nr(buf, i - 1) -
4157                                 btrfs_item_end_nr(buf, i);
4158                 }
4159                 if (!shift)
4160                         continue;
4161
4162                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4163                        i, shift, (unsigned long long)buf->start);
4164                 offset = btrfs_item_offset_nr(buf, i);
4165                 memmove_extent_buffer(buf,
4166                                       btrfs_leaf_data(buf) + offset + shift,
4167                                       btrfs_leaf_data(buf) + offset,
4168                                       btrfs_item_size_nr(buf, i));
4169                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4170                                       offset + shift);
4171                 btrfs_mark_buffer_dirty(buf);
4172         }
4173
4174         /*
4175          * We may have moved things, in which case we want to exit so we don't
4176          * write those changes out.  Once we have proper abort functionality in
4177          * progs this can be changed to something nicer.
4178          */
4179         BUG_ON(ret);
4180         return ret;
4181 }
4182
4183 /*
4184  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4185  * then just return -EIO.
4186  */
4187 static int try_to_fix_bad_block(struct btrfs_root *root,
4188                                 struct extent_buffer *buf,
4189                                 enum btrfs_tree_block_status status)
4190 {
4191         struct btrfs_trans_handle *trans;
4192         struct ulist *roots;
4193         struct ulist_node *node;
4194         struct btrfs_root *search_root;
4195         struct btrfs_path *path;
4196         struct ulist_iterator iter;
4197         struct btrfs_key root_key, key;
4198         int ret;
4199
4200         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4201             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4202                 return -EIO;
4203
4204         path = btrfs_alloc_path();
4205         if (!path)
4206                 return -EIO;
4207
4208         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4209                                    0, &roots);
4210         if (ret) {
4211                 btrfs_free_path(path);
4212                 return -EIO;
4213         }
4214
4215         ULIST_ITER_INIT(&iter);
4216         while ((node = ulist_next(roots, &iter))) {
4217                 root_key.objectid = node->val;
4218                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4219                 root_key.offset = (u64)-1;
4220
4221                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4222                 if (IS_ERR(root)) {
4223                         ret = -EIO;
4224                         break;
4225                 }
4226
4227
4228                 trans = btrfs_start_transaction(search_root, 0);
4229                 if (IS_ERR(trans)) {
4230                         ret = PTR_ERR(trans);
4231                         break;
4232                 }
4233
4234                 path->lowest_level = btrfs_header_level(buf);
4235                 path->skip_check_block = 1;
4236                 if (path->lowest_level)
4237                         btrfs_node_key_to_cpu(buf, &key, 0);
4238                 else
4239                         btrfs_item_key_to_cpu(buf, &key, 0);
4240                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4241                 if (ret) {
4242                         ret = -EIO;
4243                         btrfs_commit_transaction(trans, search_root);
4244                         break;
4245                 }
4246                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4247                         ret = fix_key_order(trans, search_root, path);
4248                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4249                         ret = fix_item_offset(trans, search_root, path);
4250                 if (ret) {
4251                         btrfs_commit_transaction(trans, search_root);
4252                         break;
4253                 }
4254                 btrfs_release_path(path);
4255                 btrfs_commit_transaction(trans, search_root);
4256         }
4257         ulist_free(roots);
4258         btrfs_free_path(path);
4259         return ret;
4260 }
4261
4262 static int check_block(struct btrfs_root *root,
4263                        struct cache_tree *extent_cache,
4264                        struct extent_buffer *buf, u64 flags)
4265 {
4266         struct extent_record *rec;
4267         struct cache_extent *cache;
4268         struct btrfs_key key;
4269         enum btrfs_tree_block_status status;
4270         int ret = 0;
4271         int level;
4272
4273         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4274         if (!cache)
4275                 return 1;
4276         rec = container_of(cache, struct extent_record, cache);
4277         rec->generation = btrfs_header_generation(buf);
4278
4279         level = btrfs_header_level(buf);
4280         if (btrfs_header_nritems(buf) > 0) {
4281
4282                 if (level == 0)
4283                         btrfs_item_key_to_cpu(buf, &key, 0);
4284                 else
4285                         btrfs_node_key_to_cpu(buf, &key, 0);
4286
4287                 rec->info_objectid = key.objectid;
4288         }
4289         rec->info_level = level;
4290
4291         if (btrfs_is_leaf(buf))
4292                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4293         else
4294                 status = btrfs_check_node(root, &rec->parent_key, buf);
4295
4296         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4297                 if (repair)
4298                         status = try_to_fix_bad_block(root, buf, status);
4299                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4300                         ret = -EIO;
4301                         fprintf(stderr, "bad block %llu\n",
4302                                 (unsigned long long)buf->start);
4303                 } else {
4304                         /*
4305                          * Signal to callers we need to start the scan over
4306                          * again since we'll have cow'ed blocks.
4307                          */
4308                         ret = -EAGAIN;
4309                 }
4310         } else {
4311                 rec->content_checked = 1;
4312                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4313                         rec->owner_ref_checked = 1;
4314                 else {
4315                         ret = check_owner_ref(root, rec, buf);
4316                         if (!ret)
4317                                 rec->owner_ref_checked = 1;
4318                 }
4319         }
4320         if (!ret)
4321                 maybe_free_extent_rec(extent_cache, rec);
4322         return ret;
4323 }
4324
4325 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4326                                                 u64 parent, u64 root)
4327 {
4328         struct list_head *cur = rec->backrefs.next;
4329         struct extent_backref *node;
4330         struct tree_backref *back;
4331
4332         while(cur != &rec->backrefs) {
4333                 node = list_entry(cur, struct extent_backref, list);
4334                 cur = cur->next;
4335                 if (node->is_data)
4336                         continue;
4337                 back = (struct tree_backref *)node;
4338                 if (parent > 0) {
4339                         if (!node->full_backref)
4340                                 continue;
4341                         if (parent == back->parent)
4342                                 return back;
4343                 } else {
4344                         if (node->full_backref)
4345                                 continue;
4346                         if (back->root == root)
4347                                 return back;
4348                 }
4349         }
4350         return NULL;
4351 }
4352
4353 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4354                                                 u64 parent, u64 root)
4355 {
4356         struct tree_backref *ref = malloc(sizeof(*ref));
4357         memset(&ref->node, 0, sizeof(ref->node));
4358         if (parent > 0) {
4359                 ref->parent = parent;
4360                 ref->node.full_backref = 1;
4361         } else {
4362                 ref->root = root;
4363                 ref->node.full_backref = 0;
4364         }
4365         list_add_tail(&ref->node.list, &rec->backrefs);
4366
4367         return ref;
4368 }
4369
4370 static struct data_backref *find_data_backref(struct extent_record *rec,
4371                                                 u64 parent, u64 root,
4372                                                 u64 owner, u64 offset,
4373                                                 int found_ref,
4374                                                 u64 disk_bytenr, u64 bytes)
4375 {
4376         struct list_head *cur = rec->backrefs.next;
4377         struct extent_backref *node;
4378         struct data_backref *back;
4379
4380         while(cur != &rec->backrefs) {
4381                 node = list_entry(cur, struct extent_backref, list);
4382                 cur = cur->next;
4383                 if (!node->is_data)
4384                         continue;
4385                 back = (struct data_backref *)node;
4386                 if (parent > 0) {
4387                         if (!node->full_backref)
4388                                 continue;
4389                         if (parent == back->parent)
4390                                 return back;
4391                 } else {
4392                         if (node->full_backref)
4393                                 continue;
4394                         if (back->root == root && back->owner == owner &&
4395                             back->offset == offset) {
4396                                 if (found_ref && node->found_ref &&
4397                                     (back->bytes != bytes ||
4398                                     back->disk_bytenr != disk_bytenr))
4399                                         continue;
4400                                 return back;
4401                         }
4402                 }
4403         }
4404         return NULL;
4405 }
4406
4407 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4408                                                 u64 parent, u64 root,
4409                                                 u64 owner, u64 offset,
4410                                                 u64 max_size)
4411 {
4412         struct data_backref *ref = malloc(sizeof(*ref));
4413         memset(&ref->node, 0, sizeof(ref->node));
4414         ref->node.is_data = 1;
4415
4416         if (parent > 0) {
4417                 ref->parent = parent;
4418                 ref->owner = 0;
4419                 ref->offset = 0;
4420                 ref->node.full_backref = 1;
4421         } else {
4422                 ref->root = root;
4423                 ref->owner = owner;
4424                 ref->offset = offset;
4425                 ref->node.full_backref = 0;
4426         }
4427         ref->bytes = max_size;
4428         ref->found_ref = 0;
4429         ref->num_refs = 0;
4430         list_add_tail(&ref->node.list, &rec->backrefs);
4431         if (max_size > rec->max_size)
4432                 rec->max_size = max_size;
4433         return ref;
4434 }
4435
4436 /* Check if the type of extent matches with its chunk */
4437 static void check_extent_type(struct extent_record *rec)
4438 {
4439         struct btrfs_block_group_cache *bg_cache;
4440
4441         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4442         if (!bg_cache)
4443                 return;
4444
4445         /* data extent, check chunk directly*/
4446         if (!rec->metadata) {
4447                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4448                         rec->wrong_chunk_type = 1;
4449                 return;
4450         }
4451
4452         /* metadata extent, check the obvious case first */
4453         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4454                                  BTRFS_BLOCK_GROUP_METADATA))) {
4455                 rec->wrong_chunk_type = 1;
4456                 return;
4457         }
4458
4459         /*
4460          * Check SYSTEM extent, as it's also marked as metadata, we can only
4461          * make sure it's a SYSTEM extent by its backref
4462          */
4463         if (!list_empty(&rec->backrefs)) {
4464                 struct extent_backref *node;
4465                 struct tree_backref *tback;
4466                 u64 bg_type;
4467
4468                 node = list_entry(rec->backrefs.next, struct extent_backref,
4469                                   list);
4470                 if (node->is_data) {
4471                         /* tree block shouldn't have data backref */
4472                         rec->wrong_chunk_type = 1;
4473                         return;
4474                 }
4475                 tback = container_of(node, struct tree_backref, node);
4476
4477                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4478                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4479                 else
4480                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4481                 if (!(bg_cache->flags & bg_type))
4482                         rec->wrong_chunk_type = 1;
4483         }
4484 }
4485
4486 static int add_extent_rec(struct cache_tree *extent_cache,
4487                           struct btrfs_key *parent_key, u64 parent_gen,
4488                           u64 start, u64 nr, u64 extent_item_refs,
4489                           int is_root, int inc_ref, int set_checked,
4490                           int metadata, int extent_rec, u64 max_size)
4491 {
4492         struct extent_record *rec;
4493         struct cache_extent *cache;
4494         int ret = 0;
4495         int dup = 0;
4496
4497         cache = lookup_cache_extent(extent_cache, start, nr);
4498         if (cache) {
4499                 rec = container_of(cache, struct extent_record, cache);
4500                 if (inc_ref)
4501                         rec->refs++;
4502                 if (rec->nr == 1)
4503                         rec->nr = max(nr, max_size);
4504
4505                 /*
4506                  * We need to make sure to reset nr to whatever the extent
4507                  * record says was the real size, this way we can compare it to
4508                  * the backrefs.
4509                  */
4510                 if (extent_rec) {
4511                         if (start != rec->start || rec->found_rec) {
4512                                 struct extent_record *tmp;
4513
4514                                 dup = 1;
4515                                 if (list_empty(&rec->list))
4516                                         list_add_tail(&rec->list,
4517                                                       &duplicate_extents);
4518
4519                                 /*
4520                                  * We have to do this song and dance in case we
4521                                  * find an extent record that falls inside of
4522                                  * our current extent record but does not have
4523                                  * the same objectid.
4524                                  */
4525                                 tmp = malloc(sizeof(*tmp));
4526                                 if (!tmp)
4527                                         return -ENOMEM;
4528                                 tmp->start = start;
4529                                 tmp->max_size = max_size;
4530                                 tmp->nr = nr;
4531                                 tmp->found_rec = 1;
4532                                 tmp->metadata = metadata;
4533                                 tmp->extent_item_refs = extent_item_refs;
4534                                 INIT_LIST_HEAD(&tmp->list);
4535                                 list_add_tail(&tmp->list, &rec->dups);
4536                                 rec->num_duplicates++;
4537                         } else {
4538                                 rec->nr = nr;
4539                                 rec->found_rec = 1;
4540                         }
4541                 }
4542
4543                 if (extent_item_refs && !dup) {
4544                         if (rec->extent_item_refs) {
4545                                 fprintf(stderr, "block %llu rec "
4546                                         "extent_item_refs %llu, passed %llu\n",
4547                                         (unsigned long long)start,
4548                                         (unsigned long long)
4549                                                         rec->extent_item_refs,
4550                                         (unsigned long long)extent_item_refs);
4551                         }
4552                         rec->extent_item_refs = extent_item_refs;
4553                 }
4554                 if (is_root)
4555                         rec->is_root = 1;
4556                 if (set_checked) {
4557                         rec->content_checked = 1;
4558                         rec->owner_ref_checked = 1;
4559                 }
4560
4561                 if (parent_key)
4562                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4563                 if (parent_gen)
4564                         rec->parent_generation = parent_gen;
4565
4566                 if (rec->max_size < max_size)
4567                         rec->max_size = max_size;
4568
4569                 /*
4570                  * A metadata extent can't cross stripe_len boundary, otherwise
4571                  * kernel scrub won't be able to handle it.
4572                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4573                  * it.
4574                  */
4575                 if (metadata && check_crossing_stripes(rec->start,
4576                                                        rec->max_size))
4577                                 rec->crossing_stripes = 1;
4578                 check_extent_type(rec);
4579                 maybe_free_extent_rec(extent_cache, rec);
4580                 return ret;
4581         }
4582         rec = malloc(sizeof(*rec));
4583         rec->start = start;
4584         rec->max_size = max_size;
4585         rec->nr = max(nr, max_size);
4586         rec->found_rec = !!extent_rec;
4587         rec->content_checked = 0;
4588         rec->owner_ref_checked = 0;
4589         rec->num_duplicates = 0;
4590         rec->metadata = metadata;
4591         rec->flag_block_full_backref = -1;
4592         rec->bad_full_backref = 0;
4593         rec->crossing_stripes = 0;
4594         rec->wrong_chunk_type = 0;
4595         INIT_LIST_HEAD(&rec->backrefs);
4596         INIT_LIST_HEAD(&rec->dups);
4597         INIT_LIST_HEAD(&rec->list);
4598
4599         if (is_root)
4600                 rec->is_root = 1;
4601         else
4602                 rec->is_root = 0;
4603
4604         if (inc_ref)
4605                 rec->refs = 1;
4606         else
4607                 rec->refs = 0;
4608
4609         if (extent_item_refs)
4610                 rec->extent_item_refs = extent_item_refs;
4611         else
4612                 rec->extent_item_refs = 0;
4613
4614         if (parent_key)
4615                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4616         else
4617                 memset(&rec->parent_key, 0, sizeof(*parent_key));
4618
4619         if (parent_gen)
4620                 rec->parent_generation = parent_gen;
4621         else
4622                 rec->parent_generation = 0;
4623
4624         rec->cache.start = start;
4625         rec->cache.size = nr;
4626         ret = insert_cache_extent(extent_cache, &rec->cache);
4627         BUG_ON(ret);
4628         bytes_used += nr;
4629         if (set_checked) {
4630                 rec->content_checked = 1;
4631                 rec->owner_ref_checked = 1;
4632         }
4633
4634         if (metadata)
4635                 if (check_crossing_stripes(rec->start, rec->max_size))
4636                         rec->crossing_stripes = 1;
4637         check_extent_type(rec);
4638         return ret;
4639 }
4640
4641 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4642                             u64 parent, u64 root, int found_ref)
4643 {
4644         struct extent_record *rec;
4645         struct tree_backref *back;
4646         struct cache_extent *cache;
4647
4648         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4649         if (!cache) {
4650                 add_extent_rec(extent_cache, NULL, 0, bytenr,
4651                                1, 0, 0, 0, 0, 1, 0, 0);
4652                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4653                 if (!cache)
4654                         abort();
4655         }
4656
4657         rec = container_of(cache, struct extent_record, cache);
4658         if (rec->start != bytenr) {
4659                 abort();
4660         }
4661
4662         back = find_tree_backref(rec, parent, root);
4663         if (!back)
4664                 back = alloc_tree_backref(rec, parent, root);
4665
4666         if (found_ref) {
4667                 if (back->node.found_ref) {
4668                         fprintf(stderr, "Extent back ref already exists "
4669                                 "for %llu parent %llu root %llu \n",
4670                                 (unsigned long long)bytenr,
4671                                 (unsigned long long)parent,
4672                                 (unsigned long long)root);
4673                 }
4674                 back->node.found_ref = 1;
4675         } else {
4676                 if (back->node.found_extent_tree) {
4677                         fprintf(stderr, "Extent back ref already exists "
4678                                 "for %llu parent %llu root %llu \n",
4679                                 (unsigned long long)bytenr,
4680                                 (unsigned long long)parent,
4681                                 (unsigned long long)root);
4682                 }
4683                 back->node.found_extent_tree = 1;
4684         }
4685         check_extent_type(rec);
4686         maybe_free_extent_rec(extent_cache, rec);
4687         return 0;
4688 }
4689
4690 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4691                             u64 parent, u64 root, u64 owner, u64 offset,
4692                             u32 num_refs, int found_ref, u64 max_size)
4693 {
4694         struct extent_record *rec;
4695         struct data_backref *back;
4696         struct cache_extent *cache;
4697
4698         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4699         if (!cache) {
4700                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4701                                0, 0, max_size);
4702                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4703                 if (!cache)
4704                         abort();
4705         }
4706
4707         rec = container_of(cache, struct extent_record, cache);
4708         if (rec->max_size < max_size)
4709                 rec->max_size = max_size;
4710
4711         /*
4712          * If found_ref is set then max_size is the real size and must match the
4713          * existing refs.  So if we have already found a ref then we need to
4714          * make sure that this ref matches the existing one, otherwise we need
4715          * to add a new backref so we can notice that the backrefs don't match
4716          * and we need to figure out who is telling the truth.  This is to
4717          * account for that awful fsync bug I introduced where we'd end up with
4718          * a btrfs_file_extent_item that would have its length include multiple
4719          * prealloc extents or point inside of a prealloc extent.
4720          */
4721         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4722                                  bytenr, max_size);
4723         if (!back)
4724                 back = alloc_data_backref(rec, parent, root, owner, offset,
4725                                           max_size);
4726
4727         if (found_ref) {
4728                 BUG_ON(num_refs != 1);
4729                 if (back->node.found_ref)
4730                         BUG_ON(back->bytes != max_size);
4731                 back->node.found_ref = 1;
4732                 back->found_ref += 1;
4733                 back->bytes = max_size;
4734                 back->disk_bytenr = bytenr;
4735                 rec->refs += 1;
4736                 rec->content_checked = 1;
4737                 rec->owner_ref_checked = 1;
4738         } else {
4739                 if (back->node.found_extent_tree) {
4740                         fprintf(stderr, "Extent back ref already exists "
4741                                 "for %llu parent %llu root %llu "
4742                                 "owner %llu offset %llu num_refs %lu\n",
4743                                 (unsigned long long)bytenr,
4744                                 (unsigned long long)parent,
4745                                 (unsigned long long)root,
4746                                 (unsigned long long)owner,
4747                                 (unsigned long long)offset,
4748                                 (unsigned long)num_refs);
4749                 }
4750                 back->num_refs = num_refs;
4751                 back->node.found_extent_tree = 1;
4752         }
4753         maybe_free_extent_rec(extent_cache, rec);
4754         return 0;
4755 }
4756
4757 static int add_pending(struct cache_tree *pending,
4758                        struct cache_tree *seen, u64 bytenr, u32 size)
4759 {
4760         int ret;
4761         ret = add_cache_extent(seen, bytenr, size);
4762         if (ret)
4763                 return ret;
4764         add_cache_extent(pending, bytenr, size);
4765         return 0;
4766 }
4767
4768 static int pick_next_pending(struct cache_tree *pending,
4769                         struct cache_tree *reada,
4770                         struct cache_tree *nodes,
4771                         u64 last, struct block_info *bits, int bits_nr,
4772                         int *reada_bits)
4773 {
4774         unsigned long node_start = last;
4775         struct cache_extent *cache;
4776         int ret;
4777
4778         cache = search_cache_extent(reada, 0);
4779         if (cache) {
4780                 bits[0].start = cache->start;
4781                 bits[0].size = cache->size;
4782                 *reada_bits = 1;
4783                 return 1;
4784         }
4785         *reada_bits = 0;
4786         if (node_start > 32768)
4787                 node_start -= 32768;
4788
4789         cache = search_cache_extent(nodes, node_start);
4790         if (!cache)
4791                 cache = search_cache_extent(nodes, 0);
4792
4793         if (!cache) {
4794                  cache = search_cache_extent(pending, 0);
4795                  if (!cache)
4796                          return 0;
4797                  ret = 0;
4798                  do {
4799                          bits[ret].start = cache->start;
4800                          bits[ret].size = cache->size;
4801                          cache = next_cache_extent(cache);
4802                          ret++;
4803                  } while (cache && ret < bits_nr);
4804                  return ret;
4805         }
4806
4807         ret = 0;
4808         do {
4809                 bits[ret].start = cache->start;
4810                 bits[ret].size = cache->size;
4811                 cache = next_cache_extent(cache);
4812                 ret++;
4813         } while (cache && ret < bits_nr);
4814
4815         if (bits_nr - ret > 8) {
4816                 u64 lookup = bits[0].start + bits[0].size;
4817                 struct cache_extent *next;
4818                 next = search_cache_extent(pending, lookup);
4819                 while(next) {
4820                         if (next->start - lookup > 32768)
4821                                 break;
4822                         bits[ret].start = next->start;
4823                         bits[ret].size = next->size;
4824                         lookup = next->start + next->size;
4825                         ret++;
4826                         if (ret == bits_nr)
4827                                 break;
4828                         next = next_cache_extent(next);
4829                         if (!next)
4830                                 break;
4831                 }
4832         }
4833         return ret;
4834 }
4835
4836 static void free_chunk_record(struct cache_extent *cache)
4837 {
4838         struct chunk_record *rec;
4839
4840         rec = container_of(cache, struct chunk_record, cache);
4841         list_del_init(&rec->list);
4842         list_del_init(&rec->dextents);
4843         free(rec);
4844 }
4845
4846 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4847 {
4848         cache_tree_free_extents(chunk_cache, free_chunk_record);
4849 }
4850
4851 static void free_device_record(struct rb_node *node)
4852 {
4853         struct device_record *rec;
4854
4855         rec = container_of(node, struct device_record, node);
4856         free(rec);
4857 }
4858
4859 FREE_RB_BASED_TREE(device_cache, free_device_record);
4860
4861 int insert_block_group_record(struct block_group_tree *tree,
4862                               struct block_group_record *bg_rec)
4863 {
4864         int ret;
4865
4866         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4867         if (ret)
4868                 return ret;
4869
4870         list_add_tail(&bg_rec->list, &tree->block_groups);
4871         return 0;
4872 }
4873
4874 static void free_block_group_record(struct cache_extent *cache)
4875 {
4876         struct block_group_record *rec;
4877
4878         rec = container_of(cache, struct block_group_record, cache);
4879         list_del_init(&rec->list);
4880         free(rec);
4881 }
4882
4883 void free_block_group_tree(struct block_group_tree *tree)
4884 {
4885         cache_tree_free_extents(&tree->tree, free_block_group_record);
4886 }
4887
4888 int insert_device_extent_record(struct device_extent_tree *tree,
4889                                 struct device_extent_record *de_rec)
4890 {
4891         int ret;
4892
4893         /*
4894          * Device extent is a bit different from the other extents, because
4895          * the extents which belong to the different devices may have the
4896          * same start and size, so we need use the special extent cache
4897          * search/insert functions.
4898          */
4899         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4900         if (ret)
4901                 return ret;
4902
4903         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4904         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4905         return 0;
4906 }
4907
4908 static void free_device_extent_record(struct cache_extent *cache)
4909 {
4910         struct device_extent_record *rec;
4911
4912         rec = container_of(cache, struct device_extent_record, cache);
4913         if (!list_empty(&rec->chunk_list))
4914                 list_del_init(&rec->chunk_list);
4915         if (!list_empty(&rec->device_list))
4916                 list_del_init(&rec->device_list);
4917         free(rec);
4918 }
4919
4920 void free_device_extent_tree(struct device_extent_tree *tree)
4921 {
4922         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4923 }
4924
4925 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4926 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4927                                  struct extent_buffer *leaf, int slot)
4928 {
4929         struct btrfs_extent_ref_v0 *ref0;
4930         struct btrfs_key key;
4931
4932         btrfs_item_key_to_cpu(leaf, &key, slot);
4933         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4934         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4935                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4936         } else {
4937                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4938                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4939         }
4940         return 0;
4941 }
4942 #endif
4943
4944 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4945                                             struct btrfs_key *key,
4946                                             int slot)
4947 {
4948         struct btrfs_chunk *ptr;
4949         struct chunk_record *rec;
4950         int num_stripes, i;
4951
4952         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4953         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4954
4955         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4956         if (!rec) {
4957                 fprintf(stderr, "memory allocation failed\n");
4958                 exit(-1);
4959         }
4960
4961         INIT_LIST_HEAD(&rec->list);
4962         INIT_LIST_HEAD(&rec->dextents);
4963         rec->bg_rec = NULL;
4964
4965         rec->cache.start = key->offset;
4966         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4967
4968         rec->generation = btrfs_header_generation(leaf);
4969
4970         rec->objectid = key->objectid;
4971         rec->type = key->type;
4972         rec->offset = key->offset;
4973
4974         rec->length = rec->cache.size;
4975         rec->owner = btrfs_chunk_owner(leaf, ptr);
4976         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4977         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4978         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4979         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4980         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4981         rec->num_stripes = num_stripes;
4982         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4983
4984         for (i = 0; i < rec->num_stripes; ++i) {
4985                 rec->stripes[i].devid =
4986                         btrfs_stripe_devid_nr(leaf, ptr, i);
4987                 rec->stripes[i].offset =
4988                         btrfs_stripe_offset_nr(leaf, ptr, i);
4989                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4990                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4991                                 BTRFS_UUID_SIZE);
4992         }
4993
4994         return rec;
4995 }
4996
4997 static int process_chunk_item(struct cache_tree *chunk_cache,
4998                               struct btrfs_key *key, struct extent_buffer *eb,
4999                               int slot)
5000 {
5001         struct chunk_record *rec;
5002         int ret = 0;
5003
5004         rec = btrfs_new_chunk_record(eb, key, slot);
5005         ret = insert_cache_extent(chunk_cache, &rec->cache);
5006         if (ret) {
5007                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5008                         rec->offset, rec->length);
5009                 free(rec);
5010         }
5011
5012         return ret;
5013 }
5014
5015 static int process_device_item(struct rb_root *dev_cache,
5016                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5017 {
5018         struct btrfs_dev_item *ptr;
5019         struct device_record *rec;
5020         int ret = 0;
5021
5022         ptr = btrfs_item_ptr(eb,
5023                 slot, struct btrfs_dev_item);
5024
5025         rec = malloc(sizeof(*rec));
5026         if (!rec) {
5027                 fprintf(stderr, "memory allocation failed\n");
5028                 return -ENOMEM;
5029         }
5030
5031         rec->devid = key->offset;
5032         rec->generation = btrfs_header_generation(eb);
5033
5034         rec->objectid = key->objectid;
5035         rec->type = key->type;
5036         rec->offset = key->offset;
5037
5038         rec->devid = btrfs_device_id(eb, ptr);
5039         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5040         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5041
5042         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5043         if (ret) {
5044                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5045                 free(rec);
5046         }
5047
5048         return ret;
5049 }
5050
5051 struct block_group_record *
5052 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5053                              int slot)
5054 {
5055         struct btrfs_block_group_item *ptr;
5056         struct block_group_record *rec;
5057
5058         rec = calloc(1, sizeof(*rec));
5059         if (!rec) {
5060                 fprintf(stderr, "memory allocation failed\n");
5061                 exit(-1);
5062         }
5063
5064         rec->cache.start = key->objectid;
5065         rec->cache.size = key->offset;
5066
5067         rec->generation = btrfs_header_generation(leaf);
5068
5069         rec->objectid = key->objectid;
5070         rec->type = key->type;
5071         rec->offset = key->offset;
5072
5073         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5074         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5075
5076         INIT_LIST_HEAD(&rec->list);
5077
5078         return rec;
5079 }
5080
5081 static int process_block_group_item(struct block_group_tree *block_group_cache,
5082                                     struct btrfs_key *key,
5083                                     struct extent_buffer *eb, int slot)
5084 {
5085         struct block_group_record *rec;
5086         int ret = 0;
5087
5088         rec = btrfs_new_block_group_record(eb, key, slot);
5089         ret = insert_block_group_record(block_group_cache, rec);
5090         if (ret) {
5091                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5092                         rec->objectid, rec->offset);
5093                 free(rec);
5094         }
5095
5096         return ret;
5097 }
5098
5099 struct device_extent_record *
5100 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5101                                struct btrfs_key *key, int slot)
5102 {
5103         struct device_extent_record *rec;
5104         struct btrfs_dev_extent *ptr;
5105
5106         rec = calloc(1, sizeof(*rec));
5107         if (!rec) {
5108                 fprintf(stderr, "memory allocation failed\n");
5109                 exit(-1);
5110         }
5111
5112         rec->cache.objectid = key->objectid;
5113         rec->cache.start = key->offset;
5114
5115         rec->generation = btrfs_header_generation(leaf);
5116
5117         rec->objectid = key->objectid;
5118         rec->type = key->type;
5119         rec->offset = key->offset;
5120
5121         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5122         rec->chunk_objecteid =
5123                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5124         rec->chunk_offset =
5125                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5126         rec->length = btrfs_dev_extent_length(leaf, ptr);
5127         rec->cache.size = rec->length;
5128
5129         INIT_LIST_HEAD(&rec->chunk_list);
5130         INIT_LIST_HEAD(&rec->device_list);
5131
5132         return rec;
5133 }
5134
5135 static int
5136 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5137                            struct btrfs_key *key, struct extent_buffer *eb,
5138                            int slot)
5139 {
5140         struct device_extent_record *rec;
5141         int ret;
5142
5143         rec = btrfs_new_device_extent_record(eb, key, slot);
5144         ret = insert_device_extent_record(dev_extent_cache, rec);
5145         if (ret) {
5146                 fprintf(stderr,
5147                         "Device extent[%llu, %llu, %llu] existed.\n",
5148                         rec->objectid, rec->offset, rec->length);
5149                 free(rec);
5150         }
5151
5152         return ret;
5153 }
5154
5155 static int process_extent_item(struct btrfs_root *root,
5156                                struct cache_tree *extent_cache,
5157                                struct extent_buffer *eb, int slot)
5158 {
5159         struct btrfs_extent_item *ei;
5160         struct btrfs_extent_inline_ref *iref;
5161         struct btrfs_extent_data_ref *dref;
5162         struct btrfs_shared_data_ref *sref;
5163         struct btrfs_key key;
5164         unsigned long end;
5165         unsigned long ptr;
5166         int type;
5167         u32 item_size = btrfs_item_size_nr(eb, slot);
5168         u64 refs = 0;
5169         u64 offset;
5170         u64 num_bytes;
5171         int metadata = 0;
5172
5173         btrfs_item_key_to_cpu(eb, &key, slot);
5174
5175         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5176                 metadata = 1;
5177                 num_bytes = root->leafsize;
5178         } else {
5179                 num_bytes = key.offset;
5180         }
5181
5182         if (item_size < sizeof(*ei)) {
5183 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5184                 struct btrfs_extent_item_v0 *ei0;
5185                 BUG_ON(item_size != sizeof(*ei0));
5186                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5187                 refs = btrfs_extent_refs_v0(eb, ei0);
5188 #else
5189                 BUG();
5190 #endif
5191                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5192                                       num_bytes, refs, 0, 0, 0, metadata, 1,
5193                                       num_bytes);
5194         }
5195
5196         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5197         refs = btrfs_extent_refs(eb, ei);
5198         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5199                 metadata = 1;
5200         else
5201                 metadata = 0;
5202
5203         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5204                        refs, 0, 0, 0, metadata, 1, num_bytes);
5205
5206         ptr = (unsigned long)(ei + 1);
5207         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5208             key.type == BTRFS_EXTENT_ITEM_KEY)
5209                 ptr += sizeof(struct btrfs_tree_block_info);
5210
5211         end = (unsigned long)ei + item_size;
5212         while (ptr < end) {
5213                 iref = (struct btrfs_extent_inline_ref *)ptr;
5214                 type = btrfs_extent_inline_ref_type(eb, iref);
5215                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5216                 switch (type) {
5217                 case BTRFS_TREE_BLOCK_REF_KEY:
5218                         add_tree_backref(extent_cache, key.objectid,
5219                                          0, offset, 0);
5220                         break;
5221                 case BTRFS_SHARED_BLOCK_REF_KEY:
5222                         add_tree_backref(extent_cache, key.objectid,
5223                                          offset, 0, 0);
5224                         break;
5225                 case BTRFS_EXTENT_DATA_REF_KEY:
5226                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5227                         add_data_backref(extent_cache, key.objectid, 0,
5228                                         btrfs_extent_data_ref_root(eb, dref),
5229                                         btrfs_extent_data_ref_objectid(eb,
5230                                                                        dref),
5231                                         btrfs_extent_data_ref_offset(eb, dref),
5232                                         btrfs_extent_data_ref_count(eb, dref),
5233                                         0, num_bytes);
5234                         break;
5235                 case BTRFS_SHARED_DATA_REF_KEY:
5236                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5237                         add_data_backref(extent_cache, key.objectid, offset,
5238                                         0, 0, 0,
5239                                         btrfs_shared_data_ref_count(eb, sref),
5240                                         0, num_bytes);
5241                         break;
5242                 default:
5243                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5244                                 key.objectid, key.type, num_bytes);
5245                         goto out;
5246                 }
5247                 ptr += btrfs_extent_inline_ref_size(type);
5248         }
5249         WARN_ON(ptr > end);
5250 out:
5251         return 0;
5252 }
5253
5254 static int check_cache_range(struct btrfs_root *root,
5255                              struct btrfs_block_group_cache *cache,
5256                              u64 offset, u64 bytes)
5257 {
5258         struct btrfs_free_space *entry;
5259         u64 *logical;
5260         u64 bytenr;
5261         int stripe_len;
5262         int i, nr, ret;
5263
5264         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5265                 bytenr = btrfs_sb_offset(i);
5266                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5267                                        cache->key.objectid, bytenr, 0,
5268                                        &logical, &nr, &stripe_len);
5269                 if (ret)
5270                         return ret;
5271
5272                 while (nr--) {
5273                         if (logical[nr] + stripe_len <= offset)
5274                                 continue;
5275                         if (offset + bytes <= logical[nr])
5276                                 continue;
5277                         if (logical[nr] == offset) {
5278                                 if (stripe_len >= bytes) {
5279                                         kfree(logical);
5280                                         return 0;
5281                                 }
5282                                 bytes -= stripe_len;
5283                                 offset += stripe_len;
5284                         } else if (logical[nr] < offset) {
5285                                 if (logical[nr] + stripe_len >=
5286                                     offset + bytes) {
5287                                         kfree(logical);
5288                                         return 0;
5289                                 }
5290                                 bytes = (offset + bytes) -
5291                                         (logical[nr] + stripe_len);
5292                                 offset = logical[nr] + stripe_len;
5293                         } else {
5294                                 /*
5295                                  * Could be tricky, the super may land in the
5296                                  * middle of the area we're checking.  First
5297                                  * check the easiest case, it's at the end.
5298                                  */
5299                                 if (logical[nr] + stripe_len >=
5300                                     bytes + offset) {
5301                                         bytes = logical[nr] - offset;
5302                                         continue;
5303                                 }
5304
5305                                 /* Check the left side */
5306                                 ret = check_cache_range(root, cache,
5307                                                         offset,
5308                                                         logical[nr] - offset);
5309                                 if (ret) {
5310                                         kfree(logical);
5311                                         return ret;
5312                                 }
5313
5314                                 /* Now we continue with the right side */
5315                                 bytes = (offset + bytes) -
5316                                         (logical[nr] + stripe_len);
5317                                 offset = logical[nr] + stripe_len;
5318                         }
5319                 }
5320
5321                 kfree(logical);
5322         }
5323
5324         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5325         if (!entry) {
5326                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5327                         offset, offset+bytes);
5328                 return -EINVAL;
5329         }
5330
5331         if (entry->offset != offset) {
5332                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5333                         entry->offset);
5334                 return -EINVAL;
5335         }
5336
5337         if (entry->bytes != bytes) {
5338                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5339                         bytes, entry->bytes, offset);
5340                 return -EINVAL;
5341         }
5342
5343         unlink_free_space(cache->free_space_ctl, entry);
5344         free(entry);
5345         return 0;
5346 }
5347
5348 static int verify_space_cache(struct btrfs_root *root,
5349                               struct btrfs_block_group_cache *cache)
5350 {
5351         struct btrfs_path *path;
5352         struct extent_buffer *leaf;
5353         struct btrfs_key key;
5354         u64 last;
5355         int ret = 0;
5356
5357         path = btrfs_alloc_path();
5358         if (!path)
5359                 return -ENOMEM;
5360
5361         root = root->fs_info->extent_root;
5362
5363         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5364
5365         key.objectid = last;
5366         key.offset = 0;
5367         key.type = BTRFS_EXTENT_ITEM_KEY;
5368
5369         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5370         if (ret < 0)
5371                 goto out;
5372         ret = 0;
5373         while (1) {
5374                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5375                         ret = btrfs_next_leaf(root, path);
5376                         if (ret < 0)
5377                                 goto out;
5378                         if (ret > 0) {
5379                                 ret = 0;
5380                                 break;
5381                         }
5382                 }
5383                 leaf = path->nodes[0];
5384                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5385                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5386                         break;
5387                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5388                     key.type != BTRFS_METADATA_ITEM_KEY) {
5389                         path->slots[0]++;
5390                         continue;
5391                 }
5392
5393                 if (last == key.objectid) {
5394                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5395                                 last = key.objectid + key.offset;
5396                         else
5397                                 last = key.objectid + root->leafsize;
5398                         path->slots[0]++;
5399                         continue;
5400                 }
5401
5402                 ret = check_cache_range(root, cache, last,
5403                                         key.objectid - last);
5404                 if (ret)
5405                         break;
5406                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5407                         last = key.objectid + key.offset;
5408                 else
5409                         last = key.objectid + root->leafsize;
5410                 path->slots[0]++;
5411         }
5412
5413         if (last < cache->key.objectid + cache->key.offset)
5414                 ret = check_cache_range(root, cache, last,
5415                                         cache->key.objectid +
5416                                         cache->key.offset - last);
5417
5418 out:
5419         btrfs_free_path(path);
5420
5421         if (!ret &&
5422             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5423                 fprintf(stderr, "There are still entries left in the space "
5424                         "cache\n");
5425                 ret = -EINVAL;
5426         }
5427
5428         return ret;
5429 }
5430
5431 static int check_space_cache(struct btrfs_root *root)
5432 {
5433         struct btrfs_block_group_cache *cache;
5434         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5435         int ret;
5436         int error = 0;
5437
5438         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5439             btrfs_super_generation(root->fs_info->super_copy) !=
5440             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5441                 printf("cache and super generation don't match, space cache "
5442                        "will be invalidated\n");
5443                 return 0;
5444         }
5445
5446         if (ctx.progress_enabled) {
5447                 ctx.tp = TASK_FREE_SPACE;
5448                 task_start(ctx.info);
5449         }
5450
5451         while (1) {
5452                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5453                 if (!cache)
5454                         break;
5455
5456                 start = cache->key.objectid + cache->key.offset;
5457                 if (!cache->free_space_ctl) {
5458                         if (btrfs_init_free_space_ctl(cache,
5459                                                       root->sectorsize)) {
5460                                 ret = -ENOMEM;
5461                                 break;
5462                         }
5463                 } else {
5464                         btrfs_remove_free_space_cache(cache);
5465                 }
5466
5467                 ret = load_free_space_cache(root->fs_info, cache);
5468                 if (!ret)
5469                         continue;
5470
5471                 ret = verify_space_cache(root, cache);
5472                 if (ret) {
5473                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
5474                                 cache->key.objectid);
5475                         error++;
5476                 }
5477         }
5478
5479         task_stop(ctx.info);
5480
5481         return error ? -EINVAL : 0;
5482 }
5483
5484 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5485                         u64 num_bytes, unsigned long leaf_offset,
5486                         struct extent_buffer *eb) {
5487
5488         u64 offset = 0;
5489         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5490         char *data;
5491         unsigned long csum_offset;
5492         u32 csum;
5493         u32 csum_expected;
5494         u64 read_len;
5495         u64 data_checked = 0;
5496         u64 tmp;
5497         int ret = 0;
5498         int mirror;
5499         int num_copies;
5500
5501         if (num_bytes % root->sectorsize)
5502                 return -EINVAL;
5503
5504         data = malloc(num_bytes);
5505         if (!data)
5506                 return -ENOMEM;
5507
5508         while (offset < num_bytes) {
5509                 mirror = 0;
5510 again:
5511                 read_len = num_bytes - offset;
5512                 /* read as much space once a time */
5513                 ret = read_extent_data(root, data + offset,
5514                                 bytenr + offset, &read_len, mirror);
5515                 if (ret)
5516                         goto out;
5517                 data_checked = 0;
5518                 /* verify every 4k data's checksum */
5519                 while (data_checked < read_len) {
5520                         csum = ~(u32)0;
5521                         tmp = offset + data_checked;
5522
5523                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5524                                                csum, root->sectorsize);
5525                         btrfs_csum_final(csum, (char *)&csum);
5526
5527                         csum_offset = leaf_offset +
5528                                  tmp / root->sectorsize * csum_size;
5529                         read_extent_buffer(eb, (char *)&csum_expected,
5530                                            csum_offset, csum_size);
5531                         /* try another mirror */
5532                         if (csum != csum_expected) {
5533                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5534                                                 mirror, bytenr + tmp,
5535                                                 csum, csum_expected);
5536                                 num_copies = btrfs_num_copies(
5537                                                 &root->fs_info->mapping_tree,
5538                                                 bytenr, num_bytes);
5539                                 if (mirror < num_copies - 1) {
5540                                         mirror += 1;
5541                                         goto again;
5542                                 }
5543                         }
5544                         data_checked += root->sectorsize;
5545                 }
5546                 offset += read_len;
5547         }
5548 out:
5549         free(data);
5550         return ret;
5551 }
5552
5553 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5554                                u64 num_bytes)
5555 {
5556         struct btrfs_path *path;
5557         struct extent_buffer *leaf;
5558         struct btrfs_key key;
5559         int ret;
5560
5561         path = btrfs_alloc_path();
5562         if (!path) {
5563                 fprintf(stderr, "Error allocing path\n");
5564                 return -ENOMEM;
5565         }
5566
5567         key.objectid = bytenr;
5568         key.type = BTRFS_EXTENT_ITEM_KEY;
5569         key.offset = (u64)-1;
5570
5571 again:
5572         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5573                                 0, 0);
5574         if (ret < 0) {
5575                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5576                 btrfs_free_path(path);
5577                 return ret;
5578         } else if (ret) {
5579                 if (path->slots[0] > 0) {
5580                         path->slots[0]--;
5581                 } else {
5582                         ret = btrfs_prev_leaf(root, path);
5583                         if (ret < 0) {
5584                                 goto out;
5585                         } else if (ret > 0) {
5586                                 ret = 0;
5587                                 goto out;
5588                         }
5589                 }
5590         }
5591
5592         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5593
5594         /*
5595          * Block group items come before extent items if they have the same
5596          * bytenr, so walk back one more just in case.  Dear future traveler,
5597          * first congrats on mastering time travel.  Now if it's not too much
5598          * trouble could you go back to 2006 and tell Chris to make the
5599          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5600          * EXTENT_ITEM_KEY please?
5601          */
5602         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5603                 if (path->slots[0] > 0) {
5604                         path->slots[0]--;
5605                 } else {
5606                         ret = btrfs_prev_leaf(root, path);
5607                         if (ret < 0) {
5608                                 goto out;
5609                         } else if (ret > 0) {
5610                                 ret = 0;
5611                                 goto out;
5612                         }
5613                 }
5614                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5615         }
5616
5617         while (num_bytes) {
5618                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5619                         ret = btrfs_next_leaf(root, path);
5620                         if (ret < 0) {
5621                                 fprintf(stderr, "Error going to next leaf "
5622                                         "%d\n", ret);
5623                                 btrfs_free_path(path);
5624                                 return ret;
5625                         } else if (ret) {
5626                                 break;
5627                         }
5628                 }
5629                 leaf = path->nodes[0];
5630                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5631                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5632                         path->slots[0]++;
5633                         continue;
5634                 }
5635                 if (key.objectid + key.offset < bytenr) {
5636                         path->slots[0]++;
5637                         continue;
5638                 }
5639                 if (key.objectid > bytenr + num_bytes)
5640                         break;
5641
5642                 if (key.objectid == bytenr) {
5643                         if (key.offset >= num_bytes) {
5644                                 num_bytes = 0;
5645                                 break;
5646                         }
5647                         num_bytes -= key.offset;
5648                         bytenr += key.offset;
5649                 } else if (key.objectid < bytenr) {
5650                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5651                                 num_bytes = 0;
5652                                 break;
5653                         }
5654                         num_bytes = (bytenr + num_bytes) -
5655                                 (key.objectid + key.offset);
5656                         bytenr = key.objectid + key.offset;
5657                 } else {
5658                         if (key.objectid + key.offset < bytenr + num_bytes) {
5659                                 u64 new_start = key.objectid + key.offset;
5660                                 u64 new_bytes = bytenr + num_bytes - new_start;
5661
5662                                 /*
5663                                  * Weird case, the extent is in the middle of
5664                                  * our range, we'll have to search one side
5665                                  * and then the other.  Not sure if this happens
5666                                  * in real life, but no harm in coding it up
5667                                  * anyway just in case.
5668                                  */
5669                                 btrfs_release_path(path);
5670                                 ret = check_extent_exists(root, new_start,
5671                                                           new_bytes);
5672                                 if (ret) {
5673                                         fprintf(stderr, "Right section didn't "
5674                                                 "have a record\n");
5675                                         break;
5676                                 }
5677                                 num_bytes = key.objectid - bytenr;
5678                                 goto again;
5679                         }
5680                         num_bytes = key.objectid - bytenr;
5681                 }
5682                 path->slots[0]++;
5683         }
5684         ret = 0;
5685
5686 out:
5687         if (num_bytes && !ret) {
5688                 fprintf(stderr, "There are no extents for csum range "
5689                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5690                 ret = 1;
5691         }
5692
5693         btrfs_free_path(path);
5694         return ret;
5695 }
5696
5697 static int check_csums(struct btrfs_root *root)
5698 {
5699         struct btrfs_path *path;
5700         struct extent_buffer *leaf;
5701         struct btrfs_key key;
5702         u64 offset = 0, num_bytes = 0;
5703         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5704         int errors = 0;
5705         int ret;
5706         u64 data_len;
5707         unsigned long leaf_offset;
5708
5709         root = root->fs_info->csum_root;
5710         if (!extent_buffer_uptodate(root->node)) {
5711                 fprintf(stderr, "No valid csum tree found\n");
5712                 return -ENOENT;
5713         }
5714
5715         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5716         key.type = BTRFS_EXTENT_CSUM_KEY;
5717         key.offset = 0;
5718
5719         path = btrfs_alloc_path();
5720         if (!path)
5721                 return -ENOMEM;
5722
5723         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5724         if (ret < 0) {
5725                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5726                 btrfs_free_path(path);
5727                 return ret;
5728         }
5729
5730         if (ret > 0 && path->slots[0])
5731                 path->slots[0]--;
5732         ret = 0;
5733
5734         while (1) {
5735                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5736                         ret = btrfs_next_leaf(root, path);
5737                         if (ret < 0) {
5738                                 fprintf(stderr, "Error going to next leaf "
5739                                         "%d\n", ret);
5740                                 break;
5741                         }
5742                         if (ret)
5743                                 break;
5744                 }
5745                 leaf = path->nodes[0];
5746
5747                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5748                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5749                         path->slots[0]++;
5750                         continue;
5751                 }
5752
5753                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5754                               csum_size) * root->sectorsize;
5755                 if (!check_data_csum)
5756                         goto skip_csum_check;
5757                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5758                 ret = check_extent_csums(root, key.offset, data_len,
5759                                          leaf_offset, leaf);
5760                 if (ret)
5761                         break;
5762 skip_csum_check:
5763                 if (!num_bytes) {
5764                         offset = key.offset;
5765                 } else if (key.offset != offset + num_bytes) {
5766                         ret = check_extent_exists(root, offset, num_bytes);
5767                         if (ret) {
5768                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5769                                         "there is no extent record\n",
5770                                         offset, offset+num_bytes);
5771                                 errors++;
5772                         }
5773                         offset = key.offset;
5774                         num_bytes = 0;
5775                 }
5776                 num_bytes += data_len;
5777                 path->slots[0]++;
5778         }
5779
5780         btrfs_free_path(path);
5781         return errors;
5782 }
5783
5784 static int is_dropped_key(struct btrfs_key *key,
5785                           struct btrfs_key *drop_key) {
5786         if (key->objectid < drop_key->objectid)
5787                 return 1;
5788         else if (key->objectid == drop_key->objectid) {
5789                 if (key->type < drop_key->type)
5790                         return 1;
5791                 else if (key->type == drop_key->type) {
5792                         if (key->offset < drop_key->offset)
5793                                 return 1;
5794                 }
5795         }
5796         return 0;
5797 }
5798
5799 /*
5800  * Here are the rules for FULL_BACKREF.
5801  *
5802  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5803  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5804  *      FULL_BACKREF set.
5805  * 3) We cow'ed the block walking down a reloc tree.  This is impossible to tell
5806  *    if it happened after the relocation occurred since we'll have dropped the
5807  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5808  *    have no real way to know for sure.
5809  *
5810  * We process the blocks one root at a time, and we start from the lowest root
5811  * objectid and go to the highest.  So we can just lookup the owner backref for
5812  * the record and if we don't find it then we know it doesn't exist and we have
5813  * a FULL BACKREF.
5814  *
5815  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5816  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5817  * be set or not and then we can check later once we've gathered all the refs.
5818  */
5819 static int calc_extent_flag(struct btrfs_root *root,
5820                            struct cache_tree *extent_cache,
5821                            struct extent_buffer *buf,
5822                            struct root_item_record *ri,
5823                            u64 *flags)
5824 {
5825         struct extent_record *rec;
5826         struct cache_extent *cache;
5827         struct tree_backref *tback;
5828         u64 owner = 0;
5829
5830         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5831         /* we have added this extent before */
5832         BUG_ON(!cache);
5833         rec = container_of(cache, struct extent_record, cache);
5834
5835         /*
5836          * Except file/reloc tree, we can not have
5837          * FULL BACKREF MODE
5838          */
5839         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5840                 goto normal;
5841         /*
5842          * root node
5843          */
5844         if (buf->start == ri->bytenr)
5845                 goto normal;
5846
5847         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5848                 goto full_backref;
5849
5850         owner = btrfs_header_owner(buf);
5851         if (owner == ri->objectid)
5852                 goto normal;
5853
5854         tback = find_tree_backref(rec, 0, owner);
5855         if (!tback)
5856                 goto full_backref;
5857 normal:
5858         *flags = 0;
5859         if (rec->flag_block_full_backref != -1 &&
5860             rec->flag_block_full_backref != 0)
5861                 rec->bad_full_backref = 1;
5862         return 0;
5863 full_backref:
5864         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5865         if (rec->flag_block_full_backref != -1 &&
5866             rec->flag_block_full_backref != 1)
5867                 rec->bad_full_backref = 1;
5868         return 0;
5869 }
5870
5871 static int run_next_block(struct btrfs_root *root,
5872                           struct block_info *bits,
5873                           int bits_nr,
5874                           u64 *last,
5875                           struct cache_tree *pending,
5876                           struct cache_tree *seen,
5877                           struct cache_tree *reada,
5878                           struct cache_tree *nodes,
5879                           struct cache_tree *extent_cache,
5880                           struct cache_tree *chunk_cache,
5881                           struct rb_root *dev_cache,
5882                           struct block_group_tree *block_group_cache,
5883                           struct device_extent_tree *dev_extent_cache,
5884                           struct root_item_record *ri)
5885 {
5886         struct extent_buffer *buf;
5887         struct extent_record *rec = NULL;
5888         u64 bytenr;
5889         u32 size;
5890         u64 parent;
5891         u64 owner;
5892         u64 flags;
5893         u64 ptr;
5894         u64 gen = 0;
5895         int ret = 0;
5896         int i;
5897         int nritems;
5898         struct btrfs_key key;
5899         struct cache_extent *cache;
5900         int reada_bits;
5901
5902         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5903                                     bits_nr, &reada_bits);
5904         if (nritems == 0)
5905                 return 1;
5906
5907         if (!reada_bits) {
5908                 for(i = 0; i < nritems; i++) {
5909                         ret = add_cache_extent(reada, bits[i].start,
5910                                                bits[i].size);
5911                         if (ret == -EEXIST)
5912                                 continue;
5913
5914                         /* fixme, get the parent transid */
5915                         readahead_tree_block(root, bits[i].start,
5916                                              bits[i].size, 0);
5917                 }
5918         }
5919         *last = bits[0].start;
5920         bytenr = bits[0].start;
5921         size = bits[0].size;
5922
5923         cache = lookup_cache_extent(pending, bytenr, size);
5924         if (cache) {
5925                 remove_cache_extent(pending, cache);
5926                 free(cache);
5927         }
5928         cache = lookup_cache_extent(reada, bytenr, size);
5929         if (cache) {
5930                 remove_cache_extent(reada, cache);
5931                 free(cache);
5932         }
5933         cache = lookup_cache_extent(nodes, bytenr, size);
5934         if (cache) {
5935                 remove_cache_extent(nodes, cache);
5936                 free(cache);
5937         }
5938         cache = lookup_cache_extent(extent_cache, bytenr, size);
5939         if (cache) {
5940                 rec = container_of(cache, struct extent_record, cache);
5941                 gen = rec->parent_generation;
5942         }
5943
5944         /* fixme, get the real parent transid */
5945         buf = read_tree_block(root, bytenr, size, gen);
5946         if (!extent_buffer_uptodate(buf)) {
5947                 record_bad_block_io(root->fs_info,
5948                                     extent_cache, bytenr, size);
5949                 goto out;
5950         }
5951
5952         nritems = btrfs_header_nritems(buf);
5953
5954         flags = 0;
5955         if (!init_extent_tree) {
5956                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5957                                        btrfs_header_level(buf), 1, NULL,
5958                                        &flags);
5959                 if (ret < 0) {
5960                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5961                         if (ret < 0) {
5962                                 fprintf(stderr, "Couldn't calc extent flags\n");
5963                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5964                         }
5965                 }
5966         } else {
5967                 flags = 0;
5968                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5969                 if (ret < 0) {
5970                         fprintf(stderr, "Couldn't calc extent flags\n");
5971                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5972                 }
5973         }
5974
5975         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5976                 if (ri != NULL &&
5977                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5978                     ri->objectid == btrfs_header_owner(buf)) {
5979                         /*
5980                          * Ok we got to this block from it's original owner and
5981                          * we have FULL_BACKREF set.  Relocation can leave
5982                          * converted blocks over so this is altogether possible,
5983                          * however it's not possible if the generation > the
5984                          * last snapshot, so check for this case.
5985                          */
5986                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5987                             btrfs_header_generation(buf) > ri->last_snapshot) {
5988                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5989                                 rec->bad_full_backref = 1;
5990                         }
5991                 }
5992         } else {
5993                 if (ri != NULL &&
5994                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5995                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5996                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5997                         rec->bad_full_backref = 1;
5998                 }
5999         }
6000
6001         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6002                 rec->flag_block_full_backref = 1;
6003                 parent = bytenr;
6004                 owner = 0;
6005         } else {
6006                 rec->flag_block_full_backref = 0;
6007                 parent = 0;
6008                 owner = btrfs_header_owner(buf);
6009         }
6010
6011         ret = check_block(root, extent_cache, buf, flags);
6012         if (ret)
6013                 goto out;
6014
6015         if (btrfs_is_leaf(buf)) {
6016                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6017                 for (i = 0; i < nritems; i++) {
6018                         struct btrfs_file_extent_item *fi;
6019                         btrfs_item_key_to_cpu(buf, &key, i);
6020                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6021                                 process_extent_item(root, extent_cache, buf,
6022                                                     i);
6023                                 continue;
6024                         }
6025                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6026                                 process_extent_item(root, extent_cache, buf,
6027                                                     i);
6028                                 continue;
6029                         }
6030                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6031                                 total_csum_bytes +=
6032                                         btrfs_item_size_nr(buf, i);
6033                                 continue;
6034                         }
6035                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6036                                 process_chunk_item(chunk_cache, &key, buf, i);
6037                                 continue;
6038                         }
6039                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6040                                 process_device_item(dev_cache, &key, buf, i);
6041                                 continue;
6042                         }
6043                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6044                                 process_block_group_item(block_group_cache,
6045                                         &key, buf, i);
6046                                 continue;
6047                         }
6048                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6049                                 process_device_extent_item(dev_extent_cache,
6050                                         &key, buf, i);
6051                                 continue;
6052
6053                         }
6054                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6055 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6056                                 process_extent_ref_v0(extent_cache, buf, i);
6057 #else
6058                                 BUG();
6059 #endif
6060                                 continue;
6061                         }
6062
6063                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6064                                 add_tree_backref(extent_cache, key.objectid, 0,
6065                                                  key.offset, 0);
6066                                 continue;
6067                         }
6068                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6069                                 add_tree_backref(extent_cache, key.objectid,
6070                                                  key.offset, 0, 0);
6071                                 continue;
6072                         }
6073                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6074                                 struct btrfs_extent_data_ref *ref;
6075                                 ref = btrfs_item_ptr(buf, i,
6076                                                 struct btrfs_extent_data_ref);
6077                                 add_data_backref(extent_cache,
6078                                         key.objectid, 0,
6079                                         btrfs_extent_data_ref_root(buf, ref),
6080                                         btrfs_extent_data_ref_objectid(buf,
6081                                                                        ref),
6082                                         btrfs_extent_data_ref_offset(buf, ref),
6083                                         btrfs_extent_data_ref_count(buf, ref),
6084                                         0, root->sectorsize);
6085                                 continue;
6086                         }
6087                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6088                                 struct btrfs_shared_data_ref *ref;
6089                                 ref = btrfs_item_ptr(buf, i,
6090                                                 struct btrfs_shared_data_ref);
6091                                 add_data_backref(extent_cache,
6092                                         key.objectid, key.offset, 0, 0, 0,
6093                                         btrfs_shared_data_ref_count(buf, ref),
6094                                         0, root->sectorsize);
6095                                 continue;
6096                         }
6097                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6098                                 struct bad_item *bad;
6099
6100                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6101                                         continue;
6102                                 if (!owner)
6103                                         continue;
6104                                 bad = malloc(sizeof(struct bad_item));
6105                                 if (!bad)
6106                                         continue;
6107                                 INIT_LIST_HEAD(&bad->list);
6108                                 memcpy(&bad->key, &key,
6109                                        sizeof(struct btrfs_key));
6110                                 bad->root_id = owner;
6111                                 list_add_tail(&bad->list, &delete_items);
6112                                 continue;
6113                         }
6114                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6115                                 continue;
6116                         fi = btrfs_item_ptr(buf, i,
6117                                             struct btrfs_file_extent_item);
6118                         if (btrfs_file_extent_type(buf, fi) ==
6119                             BTRFS_FILE_EXTENT_INLINE)
6120                                 continue;
6121                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6122                                 continue;
6123
6124                         data_bytes_allocated +=
6125                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6126                         if (data_bytes_allocated < root->sectorsize) {
6127                                 abort();
6128                         }
6129                         data_bytes_referenced +=
6130                                 btrfs_file_extent_num_bytes(buf, fi);
6131                         add_data_backref(extent_cache,
6132                                 btrfs_file_extent_disk_bytenr(buf, fi),
6133                                 parent, owner, key.objectid, key.offset -
6134                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6135                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6136                 }
6137         } else {
6138                 int level;
6139                 struct btrfs_key first_key;
6140
6141                 first_key.objectid = 0;
6142
6143                 if (nritems > 0)
6144                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6145                 level = btrfs_header_level(buf);
6146                 for (i = 0; i < nritems; i++) {
6147                         ptr = btrfs_node_blockptr(buf, i);
6148                         size = btrfs_level_size(root, level - 1);
6149                         btrfs_node_key_to_cpu(buf, &key, i);
6150                         if (ri != NULL) {
6151                                 if ((level == ri->drop_level)
6152                                     && is_dropped_key(&key, &ri->drop_key)) {
6153                                         continue;
6154                                 }
6155                         }
6156                         ret = add_extent_rec(extent_cache, &key,
6157                                              btrfs_node_ptr_generation(buf, i),
6158                                              ptr, size, 0, 0, 1, 0, 1, 0,
6159                                              size);
6160                         BUG_ON(ret);
6161
6162                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6163
6164                         if (level > 1) {
6165                                 add_pending(nodes, seen, ptr, size);
6166                         } else {
6167                                 add_pending(pending, seen, ptr, size);
6168                         }
6169                 }
6170                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6171                                       nritems) * sizeof(struct btrfs_key_ptr);
6172         }
6173         total_btree_bytes += buf->len;
6174         if (fs_root_objectid(btrfs_header_owner(buf)))
6175                 total_fs_tree_bytes += buf->len;
6176         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6177                 total_extent_tree_bytes += buf->len;
6178         if (!found_old_backref &&
6179             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6180             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6181             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6182                 found_old_backref = 1;
6183 out:
6184         free_extent_buffer(buf);
6185         return ret;
6186 }
6187
6188 static int add_root_to_pending(struct extent_buffer *buf,
6189                                struct cache_tree *extent_cache,
6190                                struct cache_tree *pending,
6191                                struct cache_tree *seen,
6192                                struct cache_tree *nodes,
6193                                u64 objectid)
6194 {
6195         if (btrfs_header_level(buf) > 0)
6196                 add_pending(nodes, seen, buf->start, buf->len);
6197         else
6198                 add_pending(pending, seen, buf->start, buf->len);
6199         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6200                        0, 1, 1, 0, 1, 0, buf->len);
6201
6202         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6203             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6204                 add_tree_backref(extent_cache, buf->start, buf->start,
6205                                  0, 1);
6206         else
6207                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6208         return 0;
6209 }
6210
6211 /* as we fix the tree, we might be deleting blocks that
6212  * we're tracking for repair.  This hook makes sure we
6213  * remove any backrefs for blocks as we are fixing them.
6214  */
6215 static int free_extent_hook(struct btrfs_trans_handle *trans,
6216                             struct btrfs_root *root,
6217                             u64 bytenr, u64 num_bytes, u64 parent,
6218                             u64 root_objectid, u64 owner, u64 offset,
6219                             int refs_to_drop)
6220 {
6221         struct extent_record *rec;
6222         struct cache_extent *cache;
6223         int is_data;
6224         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6225
6226         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6227         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6228         if (!cache)
6229                 return 0;
6230
6231         rec = container_of(cache, struct extent_record, cache);
6232         if (is_data) {
6233                 struct data_backref *back;
6234                 back = find_data_backref(rec, parent, root_objectid, owner,
6235                                          offset, 1, bytenr, num_bytes);
6236                 if (!back)
6237                         goto out;
6238                 if (back->node.found_ref) {
6239                         back->found_ref -= refs_to_drop;
6240                         if (rec->refs)
6241                                 rec->refs -= refs_to_drop;
6242                 }
6243                 if (back->node.found_extent_tree) {
6244                         back->num_refs -= refs_to_drop;
6245                         if (rec->extent_item_refs)
6246                                 rec->extent_item_refs -= refs_to_drop;
6247                 }
6248                 if (back->found_ref == 0)
6249                         back->node.found_ref = 0;
6250                 if (back->num_refs == 0)
6251                         back->node.found_extent_tree = 0;
6252
6253                 if (!back->node.found_extent_tree && back->node.found_ref) {
6254                         list_del(&back->node.list);
6255                         free(back);
6256                 }
6257         } else {
6258                 struct tree_backref *back;
6259                 back = find_tree_backref(rec, parent, root_objectid);
6260                 if (!back)
6261                         goto out;
6262                 if (back->node.found_ref) {
6263                         if (rec->refs)
6264                                 rec->refs--;
6265                         back->node.found_ref = 0;
6266                 }
6267                 if (back->node.found_extent_tree) {
6268                         if (rec->extent_item_refs)
6269                                 rec->extent_item_refs--;
6270                         back->node.found_extent_tree = 0;
6271                 }
6272                 if (!back->node.found_extent_tree && back->node.found_ref) {
6273                         list_del(&back->node.list);
6274                         free(back);
6275                 }
6276         }
6277         maybe_free_extent_rec(extent_cache, rec);
6278 out:
6279         return 0;
6280 }
6281
6282 static int delete_extent_records(struct btrfs_trans_handle *trans,
6283                                  struct btrfs_root *root,
6284                                  struct btrfs_path *path,
6285                                  u64 bytenr, u64 new_len)
6286 {
6287         struct btrfs_key key;
6288         struct btrfs_key found_key;
6289         struct extent_buffer *leaf;
6290         int ret;
6291         int slot;
6292
6293
6294         key.objectid = bytenr;
6295         key.type = (u8)-1;
6296         key.offset = (u64)-1;
6297
6298         while(1) {
6299                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6300                                         &key, path, 0, 1);
6301                 if (ret < 0)
6302                         break;
6303
6304                 if (ret > 0) {
6305                         ret = 0;
6306                         if (path->slots[0] == 0)
6307                                 break;
6308                         path->slots[0]--;
6309                 }
6310                 ret = 0;
6311
6312                 leaf = path->nodes[0];
6313                 slot = path->slots[0];
6314
6315                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6316                 if (found_key.objectid != bytenr)
6317                         break;
6318
6319                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6320                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6321                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6322                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6323                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6324                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6325                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6326                         btrfs_release_path(path);
6327                         if (found_key.type == 0) {
6328                                 if (found_key.offset == 0)
6329                                         break;
6330                                 key.offset = found_key.offset - 1;
6331                                 key.type = found_key.type;
6332                         }
6333                         key.type = found_key.type - 1;
6334                         key.offset = (u64)-1;
6335                         continue;
6336                 }
6337
6338                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6339                         found_key.objectid, found_key.type, found_key.offset);
6340
6341                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6342                 if (ret)
6343                         break;
6344                 btrfs_release_path(path);
6345
6346                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6347                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6348                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6349                                 found_key.offset : root->leafsize;
6350
6351                         ret = btrfs_update_block_group(trans, root, bytenr,
6352                                                        bytes, 0, 0);
6353                         if (ret)
6354                                 break;
6355                 }
6356         }
6357
6358         btrfs_release_path(path);
6359         return ret;
6360 }
6361
6362 /*
6363  * for a single backref, this will allocate a new extent
6364  * and add the backref to it.
6365  */
6366 static int record_extent(struct btrfs_trans_handle *trans,
6367                          struct btrfs_fs_info *info,
6368                          struct btrfs_path *path,
6369                          struct extent_record *rec,
6370                          struct extent_backref *back,
6371                          int allocated, u64 flags)
6372 {
6373         int ret;
6374         struct btrfs_root *extent_root = info->extent_root;
6375         struct extent_buffer *leaf;
6376         struct btrfs_key ins_key;
6377         struct btrfs_extent_item *ei;
6378         struct tree_backref *tback;
6379         struct data_backref *dback;
6380         struct btrfs_tree_block_info *bi;
6381
6382         if (!back->is_data)
6383                 rec->max_size = max_t(u64, rec->max_size,
6384                                     info->extent_root->leafsize);
6385
6386         if (!allocated) {
6387                 u32 item_size = sizeof(*ei);
6388
6389                 if (!back->is_data)
6390                         item_size += sizeof(*bi);
6391
6392                 ins_key.objectid = rec->start;
6393                 ins_key.offset = rec->max_size;
6394                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6395
6396                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6397                                         &ins_key, item_size);
6398                 if (ret)
6399                         goto fail;
6400
6401                 leaf = path->nodes[0];
6402                 ei = btrfs_item_ptr(leaf, path->slots[0],
6403                                     struct btrfs_extent_item);
6404
6405                 btrfs_set_extent_refs(leaf, ei, 0);
6406                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6407
6408                 if (back->is_data) {
6409                         btrfs_set_extent_flags(leaf, ei,
6410                                                BTRFS_EXTENT_FLAG_DATA);
6411                 } else {
6412                         struct btrfs_disk_key copy_key;;
6413
6414                         tback = (struct tree_backref *)back;
6415                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6416                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6417                                              sizeof(*bi));
6418
6419                         btrfs_set_disk_key_objectid(&copy_key,
6420                                                     rec->info_objectid);
6421                         btrfs_set_disk_key_type(&copy_key, 0);
6422                         btrfs_set_disk_key_offset(&copy_key, 0);
6423
6424                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6425                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6426
6427                         btrfs_set_extent_flags(leaf, ei,
6428                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6429                 }
6430
6431                 btrfs_mark_buffer_dirty(leaf);
6432                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6433                                                rec->max_size, 1, 0);
6434                 if (ret)
6435                         goto fail;
6436                 btrfs_release_path(path);
6437         }
6438
6439         if (back->is_data) {
6440                 u64 parent;
6441                 int i;
6442
6443                 dback = (struct data_backref *)back;
6444                 if (back->full_backref)
6445                         parent = dback->parent;
6446                 else
6447                         parent = 0;
6448
6449                 for (i = 0; i < dback->found_ref; i++) {
6450                         /* if parent != 0, we're doing a full backref
6451                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6452                          * just makes the backref allocator create a data
6453                          * backref
6454                          */
6455                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6456                                                    rec->start, rec->max_size,
6457                                                    parent,
6458                                                    dback->root,
6459                                                    parent ?
6460                                                    BTRFS_FIRST_FREE_OBJECTID :
6461                                                    dback->owner,
6462                                                    dback->offset);
6463                         if (ret)
6464                                 break;
6465                 }
6466                 fprintf(stderr, "adding new data backref"
6467                                 " on %llu %s %llu owner %llu"
6468                                 " offset %llu found %d\n",
6469                                 (unsigned long long)rec->start,
6470                                 back->full_backref ?
6471                                 "parent" : "root",
6472                                 back->full_backref ?
6473                                 (unsigned long long)parent :
6474                                 (unsigned long long)dback->root,
6475                                 (unsigned long long)dback->owner,
6476                                 (unsigned long long)dback->offset,
6477                                 dback->found_ref);
6478         } else {
6479                 u64 parent;
6480
6481                 tback = (struct tree_backref *)back;
6482                 if (back->full_backref)
6483                         parent = tback->parent;
6484                 else
6485                         parent = 0;
6486
6487                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6488                                            rec->start, rec->max_size,
6489                                            parent, tback->root, 0, 0);
6490                 fprintf(stderr, "adding new tree backref on "
6491                         "start %llu len %llu parent %llu root %llu\n",
6492                         rec->start, rec->max_size, parent, tback->root);
6493         }
6494 fail:
6495         btrfs_release_path(path);
6496         return ret;
6497 }
6498
6499 struct extent_entry {
6500         u64 bytenr;
6501         u64 bytes;
6502         int count;
6503         int broken;
6504         struct list_head list;
6505 };
6506
6507 static struct extent_entry *find_entry(struct list_head *entries,
6508                                        u64 bytenr, u64 bytes)
6509 {
6510         struct extent_entry *entry = NULL;
6511
6512         list_for_each_entry(entry, entries, list) {
6513                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6514                         return entry;
6515         }
6516
6517         return NULL;
6518 }
6519
6520 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6521 {
6522         struct extent_entry *entry, *best = NULL, *prev = NULL;
6523
6524         list_for_each_entry(entry, entries, list) {
6525                 if (!prev) {
6526                         prev = entry;
6527                         continue;
6528                 }
6529
6530                 /*
6531                  * If there are as many broken entries as entries then we know
6532                  * not to trust this particular entry.
6533                  */
6534                 if (entry->broken == entry->count)
6535                         continue;
6536
6537                 /*
6538                  * If our current entry == best then we can't be sure our best
6539                  * is really the best, so we need to keep searching.
6540                  */
6541                 if (best && best->count == entry->count) {
6542                         prev = entry;
6543                         best = NULL;
6544                         continue;
6545                 }
6546
6547                 /* Prev == entry, not good enough, have to keep searching */
6548                 if (!prev->broken && prev->count == entry->count)
6549                         continue;
6550
6551                 if (!best)
6552                         best = (prev->count > entry->count) ? prev : entry;
6553                 else if (best->count < entry->count)
6554                         best = entry;
6555                 prev = entry;
6556         }
6557
6558         return best;
6559 }
6560
6561 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6562                       struct data_backref *dback, struct extent_entry *entry)
6563 {
6564         struct btrfs_trans_handle *trans;
6565         struct btrfs_root *root;
6566         struct btrfs_file_extent_item *fi;
6567         struct extent_buffer *leaf;
6568         struct btrfs_key key;
6569         u64 bytenr, bytes;
6570         int ret, err;
6571
6572         key.objectid = dback->root;
6573         key.type = BTRFS_ROOT_ITEM_KEY;
6574         key.offset = (u64)-1;
6575         root = btrfs_read_fs_root(info, &key);
6576         if (IS_ERR(root)) {
6577                 fprintf(stderr, "Couldn't find root for our ref\n");
6578                 return -EINVAL;
6579         }
6580
6581         /*
6582          * The backref points to the original offset of the extent if it was
6583          * split, so we need to search down to the offset we have and then walk
6584          * forward until we find the backref we're looking for.
6585          */
6586         key.objectid = dback->owner;
6587         key.type = BTRFS_EXTENT_DATA_KEY;
6588         key.offset = dback->offset;
6589         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6590         if (ret < 0) {
6591                 fprintf(stderr, "Error looking up ref %d\n", ret);
6592                 return ret;
6593         }
6594
6595         while (1) {
6596                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6597                         ret = btrfs_next_leaf(root, path);
6598                         if (ret) {
6599                                 fprintf(stderr, "Couldn't find our ref, next\n");
6600                                 return -EINVAL;
6601                         }
6602                 }
6603                 leaf = path->nodes[0];
6604                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6605                 if (key.objectid != dback->owner ||
6606                     key.type != BTRFS_EXTENT_DATA_KEY) {
6607                         fprintf(stderr, "Couldn't find our ref, search\n");
6608                         return -EINVAL;
6609                 }
6610                 fi = btrfs_item_ptr(leaf, path->slots[0],
6611                                     struct btrfs_file_extent_item);
6612                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6613                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6614
6615                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6616                         break;
6617                 path->slots[0]++;
6618         }
6619
6620         btrfs_release_path(path);
6621
6622         trans = btrfs_start_transaction(root, 1);
6623         if (IS_ERR(trans))
6624                 return PTR_ERR(trans);
6625
6626         /*
6627          * Ok we have the key of the file extent we want to fix, now we can cow
6628          * down to the thing and fix it.
6629          */
6630         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6631         if (ret < 0) {
6632                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6633                         key.objectid, key.type, key.offset, ret);
6634                 goto out;
6635         }
6636         if (ret > 0) {
6637                 fprintf(stderr, "Well that's odd, we just found this key "
6638                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6639                         key.offset);
6640                 ret = -EINVAL;
6641                 goto out;
6642         }
6643         leaf = path->nodes[0];
6644         fi = btrfs_item_ptr(leaf, path->slots[0],
6645                             struct btrfs_file_extent_item);
6646
6647         if (btrfs_file_extent_compression(leaf, fi) &&
6648             dback->disk_bytenr != entry->bytenr) {
6649                 fprintf(stderr, "Ref doesn't match the record start and is "
6650                         "compressed, please take a btrfs-image of this file "
6651                         "system and send it to a btrfs developer so they can "
6652                         "complete this functionality for bytenr %Lu\n",
6653                         dback->disk_bytenr);
6654                 ret = -EINVAL;
6655                 goto out;
6656         }
6657
6658         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6659                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6660         } else if (dback->disk_bytenr > entry->bytenr) {
6661                 u64 off_diff, offset;
6662
6663                 off_diff = dback->disk_bytenr - entry->bytenr;
6664                 offset = btrfs_file_extent_offset(leaf, fi);
6665                 if (dback->disk_bytenr + offset +
6666                     btrfs_file_extent_num_bytes(leaf, fi) >
6667                     entry->bytenr + entry->bytes) {
6668                         fprintf(stderr, "Ref is past the entry end, please "
6669                                 "take a btrfs-image of this file system and "
6670                                 "send it to a btrfs developer, ref %Lu\n",
6671                                 dback->disk_bytenr);
6672                         ret = -EINVAL;
6673                         goto out;
6674                 }
6675                 offset += off_diff;
6676                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6677                 btrfs_set_file_extent_offset(leaf, fi, offset);
6678         } else if (dback->disk_bytenr < entry->bytenr) {
6679                 u64 offset;
6680
6681                 offset = btrfs_file_extent_offset(leaf, fi);
6682                 if (dback->disk_bytenr + offset < entry->bytenr) {
6683                         fprintf(stderr, "Ref is before the entry start, please"
6684                                 " take a btrfs-image of this file system and "
6685                                 "send it to a btrfs developer, ref %Lu\n",
6686                                 dback->disk_bytenr);
6687                         ret = -EINVAL;
6688                         goto out;
6689                 }
6690
6691                 offset += dback->disk_bytenr;
6692                 offset -= entry->bytenr;
6693                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6694                 btrfs_set_file_extent_offset(leaf, fi, offset);
6695         }
6696
6697         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6698
6699         /*
6700          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6701          * only do this if we aren't using compression, otherwise it's a
6702          * trickier case.
6703          */
6704         if (!btrfs_file_extent_compression(leaf, fi))
6705                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6706         else
6707                 printf("ram bytes may be wrong?\n");
6708         btrfs_mark_buffer_dirty(leaf);
6709 out:
6710         err = btrfs_commit_transaction(trans, root);
6711         btrfs_release_path(path);
6712         return ret ? ret : err;
6713 }
6714
6715 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6716                            struct extent_record *rec)
6717 {
6718         struct extent_backref *back;
6719         struct data_backref *dback;
6720         struct extent_entry *entry, *best = NULL;
6721         LIST_HEAD(entries);
6722         int nr_entries = 0;
6723         int broken_entries = 0;
6724         int ret = 0;
6725         short mismatch = 0;
6726
6727         /*
6728          * Metadata is easy and the backrefs should always agree on bytenr and
6729          * size, if not we've got bigger issues.
6730          */
6731         if (rec->metadata)
6732                 return 0;
6733
6734         list_for_each_entry(back, &rec->backrefs, list) {
6735                 if (back->full_backref || !back->is_data)
6736                         continue;
6737
6738                 dback = (struct data_backref *)back;
6739
6740                 /*
6741                  * We only pay attention to backrefs that we found a real
6742                  * backref for.
6743                  */
6744                 if (dback->found_ref == 0)
6745                         continue;
6746
6747                 /*
6748                  * For now we only catch when the bytes don't match, not the
6749                  * bytenr.  We can easily do this at the same time, but I want
6750                  * to have a fs image to test on before we just add repair
6751                  * functionality willy-nilly so we know we won't screw up the
6752                  * repair.
6753                  */
6754
6755                 entry = find_entry(&entries, dback->disk_bytenr,
6756                                    dback->bytes);
6757                 if (!entry) {
6758                         entry = malloc(sizeof(struct extent_entry));
6759                         if (!entry) {
6760                                 ret = -ENOMEM;
6761                                 goto out;
6762                         }
6763                         memset(entry, 0, sizeof(*entry));
6764                         entry->bytenr = dback->disk_bytenr;
6765                         entry->bytes = dback->bytes;
6766                         list_add_tail(&entry->list, &entries);
6767                         nr_entries++;
6768                 }
6769
6770                 /*
6771                  * If we only have on entry we may think the entries agree when
6772                  * in reality they don't so we have to do some extra checking.
6773                  */
6774                 if (dback->disk_bytenr != rec->start ||
6775                     dback->bytes != rec->nr || back->broken)
6776                         mismatch = 1;
6777
6778                 if (back->broken) {
6779                         entry->broken++;
6780                         broken_entries++;
6781                 }
6782
6783                 entry->count++;
6784         }
6785
6786         /* Yay all the backrefs agree, carry on good sir */
6787         if (nr_entries <= 1 && !mismatch)
6788                 goto out;
6789
6790         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6791                 "%Lu\n", rec->start);
6792
6793         /*
6794          * First we want to see if the backrefs can agree amongst themselves who
6795          * is right, so figure out which one of the entries has the highest
6796          * count.
6797          */
6798         best = find_most_right_entry(&entries);
6799
6800         /*
6801          * Ok so we may have an even split between what the backrefs think, so
6802          * this is where we use the extent ref to see what it thinks.
6803          */
6804         if (!best) {
6805                 entry = find_entry(&entries, rec->start, rec->nr);
6806                 if (!entry && (!broken_entries || !rec->found_rec)) {
6807                         fprintf(stderr, "Backrefs don't agree with each other "
6808                                 "and extent record doesn't agree with anybody,"
6809                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6810                                 rec->start, rec->nr);
6811                         ret = -EINVAL;
6812                         goto out;
6813                 } else if (!entry) {
6814                         /*
6815                          * Ok our backrefs were broken, we'll assume this is the
6816                          * correct value and add an entry for this range.
6817                          */
6818                         entry = malloc(sizeof(struct extent_entry));
6819                         if (!entry) {
6820                                 ret = -ENOMEM;
6821                                 goto out;
6822                         }
6823                         memset(entry, 0, sizeof(*entry));
6824                         entry->bytenr = rec->start;
6825                         entry->bytes = rec->nr;
6826                         list_add_tail(&entry->list, &entries);
6827                         nr_entries++;
6828                 }
6829                 entry->count++;
6830                 best = find_most_right_entry(&entries);
6831                 if (!best) {
6832                         fprintf(stderr, "Backrefs and extent record evenly "
6833                                 "split on who is right, this is going to "
6834                                 "require user input to fix bytenr %Lu bytes "
6835                                 "%Lu\n", rec->start, rec->nr);
6836                         ret = -EINVAL;
6837                         goto out;
6838                 }
6839         }
6840
6841         /*
6842          * I don't think this can happen currently as we'll abort() if we catch
6843          * this case higher up, but in case somebody removes that we still can't
6844          * deal with it properly here yet, so just bail out of that's the case.
6845          */
6846         if (best->bytenr != rec->start) {
6847                 fprintf(stderr, "Extent start and backref starts don't match, "
6848                         "please use btrfs-image on this file system and send "
6849                         "it to a btrfs developer so they can make fsck fix "
6850                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6851                         rec->start, rec->nr);
6852                 ret = -EINVAL;
6853                 goto out;
6854         }
6855
6856         /*
6857          * Ok great we all agreed on an extent record, let's go find the real
6858          * references and fix up the ones that don't match.
6859          */
6860         list_for_each_entry(back, &rec->backrefs, list) {
6861                 if (back->full_backref || !back->is_data)
6862                         continue;
6863
6864                 dback = (struct data_backref *)back;
6865
6866                 /*
6867                  * Still ignoring backrefs that don't have a real ref attached
6868                  * to them.
6869                  */
6870                 if (dback->found_ref == 0)
6871                         continue;
6872
6873                 if (dback->bytes == best->bytes &&
6874                     dback->disk_bytenr == best->bytenr)
6875                         continue;
6876
6877                 ret = repair_ref(info, path, dback, best);
6878                 if (ret)
6879                         goto out;
6880         }
6881
6882         /*
6883          * Ok we messed with the actual refs, which means we need to drop our
6884          * entire cache and go back and rescan.  I know this is a huge pain and
6885          * adds a lot of extra work, but it's the only way to be safe.  Once all
6886          * the backrefs agree we may not need to do anything to the extent
6887          * record itself.
6888          */
6889         ret = -EAGAIN;
6890 out:
6891         while (!list_empty(&entries)) {
6892                 entry = list_entry(entries.next, struct extent_entry, list);
6893                 list_del_init(&entry->list);
6894                 free(entry);
6895         }
6896         return ret;
6897 }
6898
6899 static int process_duplicates(struct btrfs_root *root,
6900                               struct cache_tree *extent_cache,
6901                               struct extent_record *rec)
6902 {
6903         struct extent_record *good, *tmp;
6904         struct cache_extent *cache;
6905         int ret;
6906
6907         /*
6908          * If we found a extent record for this extent then return, or if we
6909          * have more than one duplicate we are likely going to need to delete
6910          * something.
6911          */
6912         if (rec->found_rec || rec->num_duplicates > 1)
6913                 return 0;
6914
6915         /* Shouldn't happen but just in case */
6916         BUG_ON(!rec->num_duplicates);
6917
6918         /*
6919          * So this happens if we end up with a backref that doesn't match the
6920          * actual extent entry.  So either the backref is bad or the extent
6921          * entry is bad.  Either way we want to have the extent_record actually
6922          * reflect what we found in the extent_tree, so we need to take the
6923          * duplicate out and use that as the extent_record since the only way we
6924          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6925          */
6926         remove_cache_extent(extent_cache, &rec->cache);
6927
6928         good = list_entry(rec->dups.next, struct extent_record, list);
6929         list_del_init(&good->list);
6930         INIT_LIST_HEAD(&good->backrefs);
6931         INIT_LIST_HEAD(&good->dups);
6932         good->cache.start = good->start;
6933         good->cache.size = good->nr;
6934         good->content_checked = 0;
6935         good->owner_ref_checked = 0;
6936         good->num_duplicates = 0;
6937         good->refs = rec->refs;
6938         list_splice_init(&rec->backrefs, &good->backrefs);
6939         while (1) {
6940                 cache = lookup_cache_extent(extent_cache, good->start,
6941                                             good->nr);
6942                 if (!cache)
6943                         break;
6944                 tmp = container_of(cache, struct extent_record, cache);
6945
6946                 /*
6947                  * If we find another overlapping extent and it's found_rec is
6948                  * set then it's a duplicate and we need to try and delete
6949                  * something.
6950                  */
6951                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6952                         if (list_empty(&good->list))
6953                                 list_add_tail(&good->list,
6954                                               &duplicate_extents);
6955                         good->num_duplicates += tmp->num_duplicates + 1;
6956                         list_splice_init(&tmp->dups, &good->dups);
6957                         list_del_init(&tmp->list);
6958                         list_add_tail(&tmp->list, &good->dups);
6959                         remove_cache_extent(extent_cache, &tmp->cache);
6960                         continue;
6961                 }
6962
6963                 /*
6964                  * Ok we have another non extent item backed extent rec, so lets
6965                  * just add it to this extent and carry on like we did above.
6966                  */
6967                 good->refs += tmp->refs;
6968                 list_splice_init(&tmp->backrefs, &good->backrefs);
6969                 remove_cache_extent(extent_cache, &tmp->cache);
6970                 free(tmp);
6971         }
6972         ret = insert_cache_extent(extent_cache, &good->cache);
6973         BUG_ON(ret);
6974         free(rec);
6975         return good->num_duplicates ? 0 : 1;
6976 }
6977
6978 static int delete_duplicate_records(struct btrfs_root *root,
6979                                     struct extent_record *rec)
6980 {
6981         struct btrfs_trans_handle *trans;
6982         LIST_HEAD(delete_list);
6983         struct btrfs_path *path;
6984         struct extent_record *tmp, *good, *n;
6985         int nr_del = 0;
6986         int ret = 0, err;
6987         struct btrfs_key key;
6988
6989         path = btrfs_alloc_path();
6990         if (!path) {
6991                 ret = -ENOMEM;
6992                 goto out;
6993         }
6994
6995         good = rec;
6996         /* Find the record that covers all of the duplicates. */
6997         list_for_each_entry(tmp, &rec->dups, list) {
6998                 if (good->start < tmp->start)
6999                         continue;
7000                 if (good->nr > tmp->nr)
7001                         continue;
7002
7003                 if (tmp->start + tmp->nr < good->start + good->nr) {
7004                         fprintf(stderr, "Ok we have overlapping extents that "
7005                                 "aren't completely covered by eachother, this "
7006                                 "is going to require more careful thought.  "
7007                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7008                                 tmp->start, tmp->nr, good->start, good->nr);
7009                         abort();
7010                 }
7011                 good = tmp;
7012         }
7013
7014         if (good != rec)
7015                 list_add_tail(&rec->list, &delete_list);
7016
7017         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7018                 if (tmp == good)
7019                         continue;
7020                 list_move_tail(&tmp->list, &delete_list);
7021         }
7022
7023         root = root->fs_info->extent_root;
7024         trans = btrfs_start_transaction(root, 1);
7025         if (IS_ERR(trans)) {
7026                 ret = PTR_ERR(trans);
7027                 goto out;
7028         }
7029
7030         list_for_each_entry(tmp, &delete_list, list) {
7031                 if (tmp->found_rec == 0)
7032                         continue;
7033                 key.objectid = tmp->start;
7034                 key.type = BTRFS_EXTENT_ITEM_KEY;
7035                 key.offset = tmp->nr;
7036
7037                 /* Shouldn't happen but just in case */
7038                 if (tmp->metadata) {
7039                         fprintf(stderr, "Well this shouldn't happen, extent "
7040                                 "record overlaps but is metadata? "
7041                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7042                         abort();
7043                 }
7044
7045                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7046                 if (ret) {
7047                         if (ret > 0)
7048                                 ret = -EINVAL;
7049                         break;
7050                 }
7051                 ret = btrfs_del_item(trans, root, path);
7052                 if (ret)
7053                         break;
7054                 btrfs_release_path(path);
7055                 nr_del++;
7056         }
7057         err = btrfs_commit_transaction(trans, root);
7058         if (err && !ret)
7059                 ret = err;
7060 out:
7061         while (!list_empty(&delete_list)) {
7062                 tmp = list_entry(delete_list.next, struct extent_record, list);
7063                 list_del_init(&tmp->list);
7064                 if (tmp == rec)
7065                         continue;
7066                 free(tmp);
7067         }
7068
7069         while (!list_empty(&rec->dups)) {
7070                 tmp = list_entry(rec->dups.next, struct extent_record, list);
7071                 list_del_init(&tmp->list);
7072                 free(tmp);
7073         }
7074
7075         btrfs_free_path(path);
7076
7077         if (!ret && !nr_del)
7078                 rec->num_duplicates = 0;
7079
7080         return ret ? ret : nr_del;
7081 }
7082
7083 static int find_possible_backrefs(struct btrfs_fs_info *info,
7084                                   struct btrfs_path *path,
7085                                   struct cache_tree *extent_cache,
7086                                   struct extent_record *rec)
7087 {
7088         struct btrfs_root *root;
7089         struct extent_backref *back;
7090         struct data_backref *dback;
7091         struct cache_extent *cache;
7092         struct btrfs_file_extent_item *fi;
7093         struct btrfs_key key;
7094         u64 bytenr, bytes;
7095         int ret;
7096
7097         list_for_each_entry(back, &rec->backrefs, list) {
7098                 /* Don't care about full backrefs (poor unloved backrefs) */
7099                 if (back->full_backref || !back->is_data)
7100                         continue;
7101
7102                 dback = (struct data_backref *)back;
7103
7104                 /* We found this one, we don't need to do a lookup */
7105                 if (dback->found_ref)
7106                         continue;
7107
7108                 key.objectid = dback->root;
7109                 key.type = BTRFS_ROOT_ITEM_KEY;
7110                 key.offset = (u64)-1;
7111
7112                 root = btrfs_read_fs_root(info, &key);
7113
7114                 /* No root, definitely a bad ref, skip */
7115                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7116                         continue;
7117                 /* Other err, exit */
7118                 if (IS_ERR(root))
7119                         return PTR_ERR(root);
7120
7121                 key.objectid = dback->owner;
7122                 key.type = BTRFS_EXTENT_DATA_KEY;
7123                 key.offset = dback->offset;
7124                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7125                 if (ret) {
7126                         btrfs_release_path(path);
7127                         if (ret < 0)
7128                                 return ret;
7129                         /* Didn't find it, we can carry on */
7130                         ret = 0;
7131                         continue;
7132                 }
7133
7134                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7135                                     struct btrfs_file_extent_item);
7136                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7137                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7138                 btrfs_release_path(path);
7139                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7140                 if (cache) {
7141                         struct extent_record *tmp;
7142                         tmp = container_of(cache, struct extent_record, cache);
7143
7144                         /*
7145                          * If we found an extent record for the bytenr for this
7146                          * particular backref then we can't add it to our
7147                          * current extent record.  We only want to add backrefs
7148                          * that don't have a corresponding extent item in the
7149                          * extent tree since they likely belong to this record
7150                          * and we need to fix it if it doesn't match bytenrs.
7151                          */
7152                         if  (tmp->found_rec)
7153                                 continue;
7154                 }
7155
7156                 dback->found_ref += 1;
7157                 dback->disk_bytenr = bytenr;
7158                 dback->bytes = bytes;
7159
7160                 /*
7161                  * Set this so the verify backref code knows not to trust the
7162                  * values in this backref.
7163                  */
7164                 back->broken = 1;
7165         }
7166
7167         return 0;
7168 }
7169
7170 /*
7171  * Record orphan data ref into corresponding root.
7172  *
7173  * Return 0 if the extent item contains data ref and recorded.
7174  * Return 1 if the extent item contains no useful data ref
7175  *   On that case, it may contains only shared_dataref or metadata backref
7176  *   or the file extent exists(this should be handled by the extent bytenr
7177  *   recovery routine)
7178  * Return <0 if something goes wrong.
7179  */
7180 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7181                                       struct extent_record *rec)
7182 {
7183         struct btrfs_key key;
7184         struct btrfs_root *dest_root;
7185         struct extent_backref *back;
7186         struct data_backref *dback;
7187         struct orphan_data_extent *orphan;
7188         struct btrfs_path *path;
7189         int recorded_data_ref = 0;
7190         int ret = 0;
7191
7192         if (rec->metadata)
7193                 return 1;
7194         path = btrfs_alloc_path();
7195         if (!path)
7196                 return -ENOMEM;
7197         list_for_each_entry(back, &rec->backrefs, list) {
7198                 if (back->full_backref || !back->is_data ||
7199                     !back->found_extent_tree)
7200                         continue;
7201                 dback = (struct data_backref *)back;
7202                 if (dback->found_ref)
7203                         continue;
7204                 key.objectid = dback->root;
7205                 key.type = BTRFS_ROOT_ITEM_KEY;
7206                 key.offset = (u64)-1;
7207
7208                 dest_root = btrfs_read_fs_root(fs_info, &key);
7209
7210                 /* For non-exist root we just skip it */
7211                 if (IS_ERR(dest_root) || !dest_root)
7212                         continue;
7213
7214                 key.objectid = dback->owner;
7215                 key.type = BTRFS_EXTENT_DATA_KEY;
7216                 key.offset = dback->offset;
7217
7218                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7219                 /*
7220                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7221                  * we need to record it for inode/file extent rebuild.
7222                  * For ret > 0, we record it only for file extent rebuild.
7223                  * For ret == 0, the file extent exists but only bytenr
7224                  * mismatch, let the original bytenr fix routine to handle,
7225                  * don't record it.
7226                  */
7227                 if (ret == 0)
7228                         continue;
7229                 ret = 0;
7230                 orphan = malloc(sizeof(*orphan));
7231                 if (!orphan) {
7232                         ret = -ENOMEM;
7233                         goto out;
7234                 }
7235                 INIT_LIST_HEAD(&orphan->list);
7236                 orphan->root = dback->root;
7237                 orphan->objectid = dback->owner;
7238                 orphan->offset = dback->offset;
7239                 orphan->disk_bytenr = rec->cache.start;
7240                 orphan->disk_len = rec->cache.size;
7241                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7242                 recorded_data_ref = 1;
7243         }
7244 out:
7245         btrfs_free_path(path);
7246         if (!ret)
7247                 return !recorded_data_ref;
7248         else
7249                 return ret;
7250 }
7251
7252 /*
7253  * when an incorrect extent item is found, this will delete
7254  * all of the existing entries for it and recreate them
7255  * based on what the tree scan found.
7256  */
7257 static int fixup_extent_refs(struct btrfs_fs_info *info,
7258                              struct cache_tree *extent_cache,
7259                              struct extent_record *rec)
7260 {
7261         struct btrfs_trans_handle *trans = NULL;
7262         int ret;
7263         struct btrfs_path *path;
7264         struct list_head *cur = rec->backrefs.next;
7265         struct cache_extent *cache;
7266         struct extent_backref *back;
7267         int allocated = 0;
7268         u64 flags = 0;
7269
7270         if (rec->flag_block_full_backref)
7271                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7272
7273         path = btrfs_alloc_path();
7274         if (!path)
7275                 return -ENOMEM;
7276
7277         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7278                 /*
7279                  * Sometimes the backrefs themselves are so broken they don't
7280                  * get attached to any meaningful rec, so first go back and
7281                  * check any of our backrefs that we couldn't find and throw
7282                  * them into the list if we find the backref so that
7283                  * verify_backrefs can figure out what to do.
7284                  */
7285                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7286                 if (ret < 0)
7287                         goto out;
7288         }
7289
7290         /* step one, make sure all of the backrefs agree */
7291         ret = verify_backrefs(info, path, rec);
7292         if (ret < 0)
7293                 goto out;
7294
7295         trans = btrfs_start_transaction(info->extent_root, 1);
7296         if (IS_ERR(trans)) {
7297                 ret = PTR_ERR(trans);
7298                 goto out;
7299         }
7300
7301         /* step two, delete all the existing records */
7302         ret = delete_extent_records(trans, info->extent_root, path,
7303                                     rec->start, rec->max_size);
7304
7305         if (ret < 0)
7306                 goto out;
7307
7308         /* was this block corrupt?  If so, don't add references to it */
7309         cache = lookup_cache_extent(info->corrupt_blocks,
7310                                     rec->start, rec->max_size);
7311         if (cache) {
7312                 ret = 0;
7313                 goto out;
7314         }
7315
7316         /* step three, recreate all the refs we did find */
7317         while(cur != &rec->backrefs) {
7318                 back = list_entry(cur, struct extent_backref, list);
7319                 cur = cur->next;
7320
7321                 /*
7322                  * if we didn't find any references, don't create a
7323                  * new extent record
7324                  */
7325                 if (!back->found_ref)
7326                         continue;
7327
7328                 rec->bad_full_backref = 0;
7329                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7330                 allocated = 1;
7331
7332                 if (ret)
7333                         goto out;
7334         }
7335 out:
7336         if (trans) {
7337                 int err = btrfs_commit_transaction(trans, info->extent_root);
7338                 if (!ret)
7339                         ret = err;
7340         }
7341
7342         btrfs_free_path(path);
7343         return ret;
7344 }
7345
7346 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7347                               struct extent_record *rec)
7348 {
7349         struct btrfs_trans_handle *trans;
7350         struct btrfs_root *root = fs_info->extent_root;
7351         struct btrfs_path *path;
7352         struct btrfs_extent_item *ei;
7353         struct btrfs_key key;
7354         u64 flags;
7355         int ret = 0;
7356
7357         key.objectid = rec->start;
7358         if (rec->metadata) {
7359                 key.type = BTRFS_METADATA_ITEM_KEY;
7360                 key.offset = rec->info_level;
7361         } else {
7362                 key.type = BTRFS_EXTENT_ITEM_KEY;
7363                 key.offset = rec->max_size;
7364         }
7365
7366         path = btrfs_alloc_path();
7367         if (!path)
7368                 return -ENOMEM;
7369
7370         trans = btrfs_start_transaction(root, 0);
7371         if (IS_ERR(trans)) {
7372                 btrfs_free_path(path);
7373                 return PTR_ERR(trans);
7374         }
7375
7376         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7377         if (ret < 0) {
7378                 btrfs_free_path(path);
7379                 btrfs_commit_transaction(trans, root);
7380                 return ret;
7381         } else if (ret) {
7382                 fprintf(stderr, "Didn't find extent for %llu\n",
7383                         (unsigned long long)rec->start);
7384                 btrfs_free_path(path);
7385                 btrfs_commit_transaction(trans, root);
7386                 return -ENOENT;
7387         }
7388
7389         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7390                             struct btrfs_extent_item);
7391         flags = btrfs_extent_flags(path->nodes[0], ei);
7392         if (rec->flag_block_full_backref) {
7393                 fprintf(stderr, "setting full backref on %llu\n",
7394                         (unsigned long long)key.objectid);
7395                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7396         } else {
7397                 fprintf(stderr, "clearing full backref on %llu\n",
7398                         (unsigned long long)key.objectid);
7399                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7400         }
7401         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7402         btrfs_mark_buffer_dirty(path->nodes[0]);
7403         btrfs_free_path(path);
7404         return btrfs_commit_transaction(trans, root);
7405 }
7406
7407 /* right now we only prune from the extent allocation tree */
7408 static int prune_one_block(struct btrfs_trans_handle *trans,
7409                            struct btrfs_fs_info *info,
7410                            struct btrfs_corrupt_block *corrupt)
7411 {
7412         int ret;
7413         struct btrfs_path path;
7414         struct extent_buffer *eb;
7415         u64 found;
7416         int slot;
7417         int nritems;
7418         int level = corrupt->level + 1;
7419
7420         btrfs_init_path(&path);
7421 again:
7422         /* we want to stop at the parent to our busted block */
7423         path.lowest_level = level;
7424
7425         ret = btrfs_search_slot(trans, info->extent_root,
7426                                 &corrupt->key, &path, -1, 1);
7427
7428         if (ret < 0)
7429                 goto out;
7430
7431         eb = path.nodes[level];
7432         if (!eb) {
7433                 ret = -ENOENT;
7434                 goto out;
7435         }
7436
7437         /*
7438          * hopefully the search gave us the block we want to prune,
7439          * lets try that first
7440          */
7441         slot = path.slots[level];
7442         found =  btrfs_node_blockptr(eb, slot);
7443         if (found == corrupt->cache.start)
7444                 goto del_ptr;
7445
7446         nritems = btrfs_header_nritems(eb);
7447
7448         /* the search failed, lets scan this node and hope we find it */
7449         for (slot = 0; slot < nritems; slot++) {
7450                 found =  btrfs_node_blockptr(eb, slot);
7451                 if (found == corrupt->cache.start)
7452                         goto del_ptr;
7453         }
7454         /*
7455          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7456          * to this block
7457          */
7458         if (eb == info->extent_root->node) {
7459                 ret = -ENOENT;
7460                 goto out;
7461         } else {
7462                 level++;
7463                 btrfs_release_path(&path);
7464                 goto again;
7465         }
7466
7467 del_ptr:
7468         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7469         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7470
7471 out:
7472         btrfs_release_path(&path);
7473         return ret;
7474 }
7475
7476 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7477 {
7478         struct btrfs_trans_handle *trans = NULL;
7479         struct cache_extent *cache;
7480         struct btrfs_corrupt_block *corrupt;
7481
7482         while (1) {
7483                 cache = search_cache_extent(info->corrupt_blocks, 0);
7484                 if (!cache)
7485                         break;
7486                 if (!trans) {
7487                         trans = btrfs_start_transaction(info->extent_root, 1);
7488                         if (IS_ERR(trans))
7489                                 return PTR_ERR(trans);
7490                 }
7491                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7492                 prune_one_block(trans, info, corrupt);
7493                 remove_cache_extent(info->corrupt_blocks, cache);
7494         }
7495         if (trans)
7496                 return btrfs_commit_transaction(trans, info->extent_root);
7497         return 0;
7498 }
7499
7500 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7501 {
7502         struct btrfs_block_group_cache *cache;
7503         u64 start, end;
7504         int ret;
7505
7506         while (1) {
7507                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7508                                             &start, &end, EXTENT_DIRTY);
7509                 if (ret)
7510                         break;
7511                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7512                                    GFP_NOFS);
7513         }
7514
7515         start = 0;
7516         while (1) {
7517                 cache = btrfs_lookup_first_block_group(fs_info, start);
7518                 if (!cache)
7519                         break;
7520                 if (cache->cached)
7521                         cache->cached = 0;
7522                 start = cache->key.objectid + cache->key.offset;
7523         }
7524 }
7525
7526 static int check_extent_refs(struct btrfs_root *root,
7527                              struct cache_tree *extent_cache)
7528 {
7529         struct extent_record *rec;
7530         struct cache_extent *cache;
7531         int err = 0;
7532         int ret = 0;
7533         int fixed = 0;
7534         int had_dups = 0;
7535         int recorded = 0;
7536
7537         if (repair) {
7538                 /*
7539                  * if we're doing a repair, we have to make sure
7540                  * we don't allocate from the problem extents.
7541                  * In the worst case, this will be all the
7542                  * extents in the FS
7543                  */
7544                 cache = search_cache_extent(extent_cache, 0);
7545                 while(cache) {
7546                         rec = container_of(cache, struct extent_record, cache);
7547                         set_extent_dirty(root->fs_info->excluded_extents,
7548                                          rec->start,
7549                                          rec->start + rec->max_size - 1,
7550                                          GFP_NOFS);
7551                         cache = next_cache_extent(cache);
7552                 }
7553
7554                 /* pin down all the corrupted blocks too */
7555                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7556                 while(cache) {
7557                         set_extent_dirty(root->fs_info->excluded_extents,
7558                                          cache->start,
7559                                          cache->start + cache->size - 1,
7560                                          GFP_NOFS);
7561                         cache = next_cache_extent(cache);
7562                 }
7563                 prune_corrupt_blocks(root->fs_info);
7564                 reset_cached_block_groups(root->fs_info);
7565         }
7566
7567         reset_cached_block_groups(root->fs_info);
7568
7569         /*
7570          * We need to delete any duplicate entries we find first otherwise we
7571          * could mess up the extent tree when we have backrefs that actually
7572          * belong to a different extent item and not the weird duplicate one.
7573          */
7574         while (repair && !list_empty(&duplicate_extents)) {
7575                 rec = list_entry(duplicate_extents.next, struct extent_record,
7576                                  list);
7577                 list_del_init(&rec->list);
7578
7579                 /* Sometimes we can find a backref before we find an actual
7580                  * extent, so we need to process it a little bit to see if there
7581                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7582                  * if this is a backref screwup.  If we need to delete stuff
7583                  * process_duplicates() will return 0, otherwise it will return
7584                  * 1 and we
7585                  */
7586                 if (process_duplicates(root, extent_cache, rec))
7587                         continue;
7588                 ret = delete_duplicate_records(root, rec);
7589                 if (ret < 0)
7590                         return ret;
7591                 /*
7592                  * delete_duplicate_records will return the number of entries
7593                  * deleted, so if it's greater than 0 then we know we actually
7594                  * did something and we need to remove.
7595                  */
7596                 if (ret)
7597                         had_dups = 1;
7598         }
7599
7600         if (had_dups)
7601                 return -EAGAIN;
7602
7603         while(1) {
7604                 int cur_err = 0;
7605
7606                 fixed = 0;
7607                 recorded = 0;
7608                 cache = search_cache_extent(extent_cache, 0);
7609                 if (!cache)
7610                         break;
7611                 rec = container_of(cache, struct extent_record, cache);
7612                 if (rec->num_duplicates) {
7613                         fprintf(stderr, "extent item %llu has multiple extent "
7614                                 "items\n", (unsigned long long)rec->start);
7615                         err = 1;
7616                         cur_err = 1;
7617                 }
7618
7619                 if (rec->refs != rec->extent_item_refs) {
7620                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7621                                 (unsigned long long)rec->start,
7622                                 (unsigned long long)rec->nr);
7623                         fprintf(stderr, "extent item %llu, found %llu\n",
7624                                 (unsigned long long)rec->extent_item_refs,
7625                                 (unsigned long long)rec->refs);
7626                         ret = record_orphan_data_extents(root->fs_info, rec);
7627                         if (ret < 0)
7628                                 goto repair_abort;
7629                         if (ret == 0) {
7630                                 recorded = 1;
7631                         } else {
7632                                 /*
7633                                  * we can't use the extent to repair file
7634                                  * extent, let the fallback method handle it.
7635                                  */
7636                                 if (!fixed && repair) {
7637                                         ret = fixup_extent_refs(
7638                                                         root->fs_info,
7639                                                         extent_cache, rec);
7640                                         if (ret)
7641                                                 goto repair_abort;
7642                                         fixed = 1;
7643                                 }
7644                         }
7645                         err = 1;
7646                         cur_err = 1;
7647                 }
7648                 if (all_backpointers_checked(rec, 1)) {
7649                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7650                                 (unsigned long long)rec->start,
7651                                 (unsigned long long)rec->nr);
7652
7653                         if (!fixed && !recorded && repair) {
7654                                 ret = fixup_extent_refs(root->fs_info,
7655                                                         extent_cache, rec);
7656                                 if (ret)
7657                                         goto repair_abort;
7658                                 fixed = 1;
7659                         }
7660                         cur_err = 1;
7661                         err = 1;
7662                 }
7663                 if (!rec->owner_ref_checked) {
7664                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7665                                 (unsigned long long)rec->start,
7666                                 (unsigned long long)rec->nr);
7667                         if (!fixed && !recorded && repair) {
7668                                 ret = fixup_extent_refs(root->fs_info,
7669                                                         extent_cache, rec);
7670                                 if (ret)
7671                                         goto repair_abort;
7672                                 fixed = 1;
7673                         }
7674                         err = 1;
7675                         cur_err = 1;
7676                 }
7677                 if (rec->bad_full_backref) {
7678                         fprintf(stderr, "bad full backref, on [%llu]\n",
7679                                 (unsigned long long)rec->start);
7680                         if (repair) {
7681                                 ret = fixup_extent_flags(root->fs_info, rec);
7682                                 if (ret)
7683                                         goto repair_abort;
7684                                 fixed = 1;
7685                         }
7686                         err = 1;
7687                         cur_err = 1;
7688                 }
7689                 /*
7690                  * Although it's not a extent ref's problem, we reuse this
7691                  * routine for error reporting.
7692                  * No repair function yet.
7693                  */
7694                 if (rec->crossing_stripes) {
7695                         fprintf(stderr,
7696                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7697                                 rec->start, rec->start + rec->max_size);
7698                         err = 1;
7699                         cur_err = 1;
7700                 }
7701
7702                 if (rec->wrong_chunk_type) {
7703                         fprintf(stderr,
7704                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7705                                 rec->start, rec->start + rec->max_size);
7706                         err = 1;
7707                         cur_err = 1;
7708                 }
7709
7710                 remove_cache_extent(extent_cache, cache);
7711                 free_all_extent_backrefs(rec);
7712                 if (!init_extent_tree && repair && (!cur_err || fixed))
7713                         clear_extent_dirty(root->fs_info->excluded_extents,
7714                                            rec->start,
7715                                            rec->start + rec->max_size - 1,
7716                                            GFP_NOFS);
7717                 free(rec);
7718         }
7719 repair_abort:
7720         if (repair) {
7721                 if (ret && ret != -EAGAIN) {
7722                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7723                         exit(1);
7724                 } else if (!ret) {
7725                         struct btrfs_trans_handle *trans;
7726
7727                         root = root->fs_info->extent_root;
7728                         trans = btrfs_start_transaction(root, 1);
7729                         if (IS_ERR(trans)) {
7730                                 ret = PTR_ERR(trans);
7731                                 goto repair_abort;
7732                         }
7733
7734                         btrfs_fix_block_accounting(trans, root);
7735                         ret = btrfs_commit_transaction(trans, root);
7736                         if (ret)
7737                                 goto repair_abort;
7738                 }
7739                 if (err)
7740                         fprintf(stderr, "repaired damaged extent references\n");
7741                 return ret;
7742         }
7743         return err;
7744 }
7745
7746 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7747 {
7748         u64 stripe_size;
7749
7750         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7751                 stripe_size = length;
7752                 stripe_size /= num_stripes;
7753         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7754                 stripe_size = length * 2;
7755                 stripe_size /= num_stripes;
7756         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7757                 stripe_size = length;
7758                 stripe_size /= (num_stripes - 1);
7759         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7760                 stripe_size = length;
7761                 stripe_size /= (num_stripes - 2);
7762         } else {
7763                 stripe_size = length;
7764         }
7765         return stripe_size;
7766 }
7767
7768 /*
7769  * Check the chunk with its block group/dev list ref:
7770  * Return 0 if all refs seems valid.
7771  * Return 1 if part of refs seems valid, need later check for rebuild ref
7772  * like missing block group and needs to search extent tree to rebuild them.
7773  * Return -1 if essential refs are missing and unable to rebuild.
7774  */
7775 static int check_chunk_refs(struct chunk_record *chunk_rec,
7776                             struct block_group_tree *block_group_cache,
7777                             struct device_extent_tree *dev_extent_cache,
7778                             int silent)
7779 {
7780         struct cache_extent *block_group_item;
7781         struct block_group_record *block_group_rec;
7782         struct cache_extent *dev_extent_item;
7783         struct device_extent_record *dev_extent_rec;
7784         u64 devid;
7785         u64 offset;
7786         u64 length;
7787         int metadump_v2 = 0;
7788         int i;
7789         int ret = 0;
7790
7791         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7792                                                chunk_rec->offset,
7793                                                chunk_rec->length);
7794         if (block_group_item) {
7795                 block_group_rec = container_of(block_group_item,
7796                                                struct block_group_record,
7797                                                cache);
7798                 if (chunk_rec->length != block_group_rec->offset ||
7799                     chunk_rec->offset != block_group_rec->objectid ||
7800                     (!metadump_v2 &&
7801                      chunk_rec->type_flags != block_group_rec->flags)) {
7802                         if (!silent)
7803                                 fprintf(stderr,
7804                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7805                                         chunk_rec->objectid,
7806                                         chunk_rec->type,
7807                                         chunk_rec->offset,
7808                                         chunk_rec->length,
7809                                         chunk_rec->offset,
7810                                         chunk_rec->type_flags,
7811                                         block_group_rec->objectid,
7812                                         block_group_rec->type,
7813                                         block_group_rec->offset,
7814                                         block_group_rec->offset,
7815                                         block_group_rec->objectid,
7816                                         block_group_rec->flags);
7817                         ret = -1;
7818                 } else {
7819                         list_del_init(&block_group_rec->list);
7820                         chunk_rec->bg_rec = block_group_rec;
7821                 }
7822         } else {
7823                 if (!silent)
7824                         fprintf(stderr,
7825                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7826                                 chunk_rec->objectid,
7827                                 chunk_rec->type,
7828                                 chunk_rec->offset,
7829                                 chunk_rec->length,
7830                                 chunk_rec->offset,
7831                                 chunk_rec->type_flags);
7832                 ret = 1;
7833         }
7834
7835         if (metadump_v2)
7836                 return ret;
7837
7838         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7839                                     chunk_rec->num_stripes);
7840         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7841                 devid = chunk_rec->stripes[i].devid;
7842                 offset = chunk_rec->stripes[i].offset;
7843                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7844                                                        devid, offset, length);
7845                 if (dev_extent_item) {
7846                         dev_extent_rec = container_of(dev_extent_item,
7847                                                 struct device_extent_record,
7848                                                 cache);
7849                         if (dev_extent_rec->objectid != devid ||
7850                             dev_extent_rec->offset != offset ||
7851                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7852                             dev_extent_rec->length != length) {
7853                                 if (!silent)
7854                                         fprintf(stderr,
7855                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7856                                                 chunk_rec->objectid,
7857                                                 chunk_rec->type,
7858                                                 chunk_rec->offset,
7859                                                 chunk_rec->stripes[i].devid,
7860                                                 chunk_rec->stripes[i].offset,
7861                                                 dev_extent_rec->objectid,
7862                                                 dev_extent_rec->offset,
7863                                                 dev_extent_rec->length);
7864                                 ret = -1;
7865                         } else {
7866                                 list_move(&dev_extent_rec->chunk_list,
7867                                           &chunk_rec->dextents);
7868                         }
7869                 } else {
7870                         if (!silent)
7871                                 fprintf(stderr,
7872                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7873                                         chunk_rec->objectid,
7874                                         chunk_rec->type,
7875                                         chunk_rec->offset,
7876                                         chunk_rec->stripes[i].devid,
7877                                         chunk_rec->stripes[i].offset);
7878                         ret = -1;
7879                 }
7880         }
7881         return ret;
7882 }
7883
7884 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7885 int check_chunks(struct cache_tree *chunk_cache,
7886                  struct block_group_tree *block_group_cache,
7887                  struct device_extent_tree *dev_extent_cache,
7888                  struct list_head *good, struct list_head *bad,
7889                  struct list_head *rebuild, int silent)
7890 {
7891         struct cache_extent *chunk_item;
7892         struct chunk_record *chunk_rec;
7893         struct block_group_record *bg_rec;
7894         struct device_extent_record *dext_rec;
7895         int err;
7896         int ret = 0;
7897
7898         chunk_item = first_cache_extent(chunk_cache);
7899         while (chunk_item) {
7900                 chunk_rec = container_of(chunk_item, struct chunk_record,
7901                                          cache);
7902                 err = check_chunk_refs(chunk_rec, block_group_cache,
7903                                        dev_extent_cache, silent);
7904                 if (err < 0)
7905                         ret = err;
7906                 if (err == 0 && good)
7907                         list_add_tail(&chunk_rec->list, good);
7908                 if (err > 0 && rebuild)
7909                         list_add_tail(&chunk_rec->list, rebuild);
7910                 if (err < 0 && bad)
7911                         list_add_tail(&chunk_rec->list, bad);
7912                 chunk_item = next_cache_extent(chunk_item);
7913         }
7914
7915         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7916                 if (!silent)
7917                         fprintf(stderr,
7918                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7919                                 bg_rec->objectid,
7920                                 bg_rec->offset,
7921                                 bg_rec->flags);
7922                 if (!ret)
7923                         ret = 1;
7924         }
7925
7926         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7927                             chunk_list) {
7928                 if (!silent)
7929                         fprintf(stderr,
7930                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7931                                 dext_rec->objectid,
7932                                 dext_rec->offset,
7933                                 dext_rec->length);
7934                 if (!ret)
7935                         ret = 1;
7936         }
7937         return ret;
7938 }
7939
7940
7941 static int check_device_used(struct device_record *dev_rec,
7942                              struct device_extent_tree *dext_cache)
7943 {
7944         struct cache_extent *cache;
7945         struct device_extent_record *dev_extent_rec;
7946         u64 total_byte = 0;
7947
7948         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7949         while (cache) {
7950                 dev_extent_rec = container_of(cache,
7951                                               struct device_extent_record,
7952                                               cache);
7953                 if (dev_extent_rec->objectid != dev_rec->devid)
7954                         break;
7955
7956                 list_del_init(&dev_extent_rec->device_list);
7957                 total_byte += dev_extent_rec->length;
7958                 cache = next_cache_extent(cache);
7959         }
7960
7961         if (total_byte != dev_rec->byte_used) {
7962                 fprintf(stderr,
7963                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7964                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7965                         dev_rec->type, dev_rec->offset);
7966                 return -1;
7967         } else {
7968                 return 0;
7969         }
7970 }
7971
7972 /* check btrfs_dev_item -> btrfs_dev_extent */
7973 static int check_devices(struct rb_root *dev_cache,
7974                          struct device_extent_tree *dev_extent_cache)
7975 {
7976         struct rb_node *dev_node;
7977         struct device_record *dev_rec;
7978         struct device_extent_record *dext_rec;
7979         int err;
7980         int ret = 0;
7981
7982         dev_node = rb_first(dev_cache);
7983         while (dev_node) {
7984                 dev_rec = container_of(dev_node, struct device_record, node);
7985                 err = check_device_used(dev_rec, dev_extent_cache);
7986                 if (err)
7987                         ret = err;
7988
7989                 dev_node = rb_next(dev_node);
7990         }
7991         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7992                             device_list) {
7993                 fprintf(stderr,
7994                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7995                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
7996                 if (!ret)
7997                         ret = 1;
7998         }
7999         return ret;
8000 }
8001
8002 static int add_root_item_to_list(struct list_head *head,
8003                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8004                                   u8 level, u8 drop_level,
8005                                   int level_size, struct btrfs_key *drop_key)
8006 {
8007
8008         struct root_item_record *ri_rec;
8009         ri_rec = malloc(sizeof(*ri_rec));
8010         if (!ri_rec)
8011                 return -ENOMEM;
8012         ri_rec->bytenr = bytenr;
8013         ri_rec->objectid = objectid;
8014         ri_rec->level = level;
8015         ri_rec->level_size = level_size;
8016         ri_rec->drop_level = drop_level;
8017         ri_rec->last_snapshot = last_snapshot;
8018         if (drop_key)
8019                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8020         list_add_tail(&ri_rec->list, head);
8021
8022         return 0;
8023 }
8024
8025 static void free_root_item_list(struct list_head *list)
8026 {
8027         struct root_item_record *ri_rec;
8028
8029         while (!list_empty(list)) {
8030                 ri_rec = list_first_entry(list, struct root_item_record,
8031                                           list);
8032                 list_del_init(&ri_rec->list);
8033                 free(ri_rec);
8034         }
8035 }
8036
8037 static int deal_root_from_list(struct list_head *list,
8038                                struct btrfs_root *root,
8039                                struct block_info *bits,
8040                                int bits_nr,
8041                                struct cache_tree *pending,
8042                                struct cache_tree *seen,
8043                                struct cache_tree *reada,
8044                                struct cache_tree *nodes,
8045                                struct cache_tree *extent_cache,
8046                                struct cache_tree *chunk_cache,
8047                                struct rb_root *dev_cache,
8048                                struct block_group_tree *block_group_cache,
8049                                struct device_extent_tree *dev_extent_cache)
8050 {
8051         int ret = 0;
8052         u64 last;
8053
8054         while (!list_empty(list)) {
8055                 struct root_item_record *rec;
8056                 struct extent_buffer *buf;
8057                 rec = list_entry(list->next,
8058                                  struct root_item_record, list);
8059                 last = 0;
8060                 buf = read_tree_block(root->fs_info->tree_root,
8061                                       rec->bytenr, rec->level_size, 0);
8062                 if (!extent_buffer_uptodate(buf)) {
8063                         free_extent_buffer(buf);
8064                         ret = -EIO;
8065                         break;
8066                 }
8067                 add_root_to_pending(buf, extent_cache, pending,
8068                                     seen, nodes, rec->objectid);
8069                 /*
8070                  * To rebuild extent tree, we need deal with snapshot
8071                  * one by one, otherwise we deal with node firstly which
8072                  * can maximize readahead.
8073                  */
8074                 while (1) {
8075                         ret = run_next_block(root, bits, bits_nr, &last,
8076                                              pending, seen, reada, nodes,
8077                                              extent_cache, chunk_cache,
8078                                              dev_cache, block_group_cache,
8079                                              dev_extent_cache, rec);
8080                         if (ret != 0)
8081                                 break;
8082                 }
8083                 free_extent_buffer(buf);
8084                 list_del(&rec->list);
8085                 free(rec);
8086                 if (ret < 0)
8087                         break;
8088         }
8089         while (ret >= 0) {
8090                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8091                                      reada, nodes, extent_cache, chunk_cache,
8092                                      dev_cache, block_group_cache,
8093                                      dev_extent_cache, NULL);
8094                 if (ret != 0) {
8095                         if (ret > 0)
8096                                 ret = 0;
8097                         break;
8098                 }
8099         }
8100         return ret;
8101 }
8102
8103 static int check_chunks_and_extents(struct btrfs_root *root)
8104 {
8105         struct rb_root dev_cache;
8106         struct cache_tree chunk_cache;
8107         struct block_group_tree block_group_cache;
8108         struct device_extent_tree dev_extent_cache;
8109         struct cache_tree extent_cache;
8110         struct cache_tree seen;
8111         struct cache_tree pending;
8112         struct cache_tree reada;
8113         struct cache_tree nodes;
8114         struct extent_io_tree excluded_extents;
8115         struct cache_tree corrupt_blocks;
8116         struct btrfs_path path;
8117         struct btrfs_key key;
8118         struct btrfs_key found_key;
8119         int ret, err = 0;
8120         struct block_info *bits;
8121         int bits_nr;
8122         struct extent_buffer *leaf;
8123         int slot;
8124         struct btrfs_root_item ri;
8125         struct list_head dropping_trees;
8126         struct list_head normal_trees;
8127         struct btrfs_root *root1;
8128         u64 objectid;
8129         u32 level_size;
8130         u8 level;
8131
8132         dev_cache = RB_ROOT;
8133         cache_tree_init(&chunk_cache);
8134         block_group_tree_init(&block_group_cache);
8135         device_extent_tree_init(&dev_extent_cache);
8136
8137         cache_tree_init(&extent_cache);
8138         cache_tree_init(&seen);
8139         cache_tree_init(&pending);
8140         cache_tree_init(&nodes);
8141         cache_tree_init(&reada);
8142         cache_tree_init(&corrupt_blocks);
8143         extent_io_tree_init(&excluded_extents);
8144         INIT_LIST_HEAD(&dropping_trees);
8145         INIT_LIST_HEAD(&normal_trees);
8146
8147         if (repair) {
8148                 root->fs_info->excluded_extents = &excluded_extents;
8149                 root->fs_info->fsck_extent_cache = &extent_cache;
8150                 root->fs_info->free_extent_hook = free_extent_hook;
8151                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8152         }
8153
8154         bits_nr = 1024;
8155         bits = malloc(bits_nr * sizeof(struct block_info));
8156         if (!bits) {
8157                 perror("malloc");
8158                 exit(1);
8159         }
8160
8161         if (ctx.progress_enabled) {
8162                 ctx.tp = TASK_EXTENTS;
8163                 task_start(ctx.info);
8164         }
8165
8166 again:
8167         root1 = root->fs_info->tree_root;
8168         level = btrfs_header_level(root1->node);
8169         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8170                                     root1->node->start, 0, level, 0,
8171                                     btrfs_level_size(root1, level), NULL);
8172         if (ret < 0)
8173                 goto out;
8174         root1 = root->fs_info->chunk_root;
8175         level = btrfs_header_level(root1->node);
8176         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8177                                     root1->node->start, 0, level, 0,
8178                                     btrfs_level_size(root1, level), NULL);
8179         if (ret < 0)
8180                 goto out;
8181         btrfs_init_path(&path);
8182         key.offset = 0;
8183         key.objectid = 0;
8184         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8185         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8186                                         &key, &path, 0, 0);
8187         if (ret < 0)
8188                 goto out;
8189         while(1) {
8190                 leaf = path.nodes[0];
8191                 slot = path.slots[0];
8192                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8193                         ret = btrfs_next_leaf(root, &path);
8194                         if (ret != 0)
8195                                 break;
8196                         leaf = path.nodes[0];
8197                         slot = path.slots[0];
8198                 }
8199                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8200                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8201                         unsigned long offset;
8202                         u64 last_snapshot;
8203
8204                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8205                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8206                         last_snapshot = btrfs_root_last_snapshot(&ri);
8207                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8208                                 level = btrfs_root_level(&ri);
8209                                 level_size = btrfs_level_size(root, level);
8210                                 ret = add_root_item_to_list(&normal_trees,
8211                                                 found_key.objectid,
8212                                                 btrfs_root_bytenr(&ri),
8213                                                 last_snapshot, level,
8214                                                 0, level_size, NULL);
8215                                 if (ret < 0)
8216                                         goto out;
8217                         } else {
8218                                 level = btrfs_root_level(&ri);
8219                                 level_size = btrfs_level_size(root, level);
8220                                 objectid = found_key.objectid;
8221                                 btrfs_disk_key_to_cpu(&found_key,
8222                                                       &ri.drop_progress);
8223                                 ret = add_root_item_to_list(&dropping_trees,
8224                                                 objectid,
8225                                                 btrfs_root_bytenr(&ri),
8226                                                 last_snapshot, level,
8227                                                 ri.drop_level,
8228                                                 level_size, &found_key);
8229                                 if (ret < 0)
8230                                         goto out;
8231                         }
8232                 }
8233                 path.slots[0]++;
8234         }
8235         btrfs_release_path(&path);
8236
8237         /*
8238          * check_block can return -EAGAIN if it fixes something, please keep
8239          * this in mind when dealing with return values from these functions, if
8240          * we get -EAGAIN we want to fall through and restart the loop.
8241          */
8242         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8243                                   &seen, &reada, &nodes, &extent_cache,
8244                                   &chunk_cache, &dev_cache, &block_group_cache,
8245                                   &dev_extent_cache);
8246         if (ret < 0) {
8247                 if (ret == -EAGAIN)
8248                         goto loop;
8249                 goto out;
8250         }
8251         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8252                                   &pending, &seen, &reada, &nodes,
8253                                   &extent_cache, &chunk_cache, &dev_cache,
8254                                   &block_group_cache, &dev_extent_cache);
8255         if (ret < 0) {
8256                 if (ret == -EAGAIN)
8257                         goto loop;
8258                 goto out;
8259         }
8260
8261         ret = check_chunks(&chunk_cache, &block_group_cache,
8262                            &dev_extent_cache, NULL, NULL, NULL, 0);
8263         if (ret) {
8264                 if (ret == -EAGAIN)
8265                         goto loop;
8266                 err = ret;
8267         }
8268
8269         ret = check_extent_refs(root, &extent_cache);
8270         if (ret < 0) {
8271                 if (ret == -EAGAIN)
8272                         goto loop;
8273                 goto out;
8274         }
8275
8276         ret = check_devices(&dev_cache, &dev_extent_cache);
8277         if (ret && err)
8278                 ret = err;
8279
8280 out:
8281         task_stop(ctx.info);
8282         if (repair) {
8283                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8284                 extent_io_tree_cleanup(&excluded_extents);
8285                 root->fs_info->fsck_extent_cache = NULL;
8286                 root->fs_info->free_extent_hook = NULL;
8287                 root->fs_info->corrupt_blocks = NULL;
8288                 root->fs_info->excluded_extents = NULL;
8289         }
8290         free(bits);
8291         free_chunk_cache_tree(&chunk_cache);
8292         free_device_cache_tree(&dev_cache);
8293         free_block_group_tree(&block_group_cache);
8294         free_device_extent_tree(&dev_extent_cache);
8295         free_extent_cache_tree(&seen);
8296         free_extent_cache_tree(&pending);
8297         free_extent_cache_tree(&reada);
8298         free_extent_cache_tree(&nodes);
8299         return ret;
8300 loop:
8301         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8302         free_extent_cache_tree(&seen);
8303         free_extent_cache_tree(&pending);
8304         free_extent_cache_tree(&reada);
8305         free_extent_cache_tree(&nodes);
8306         free_chunk_cache_tree(&chunk_cache);
8307         free_block_group_tree(&block_group_cache);
8308         free_device_cache_tree(&dev_cache);
8309         free_device_extent_tree(&dev_extent_cache);
8310         free_extent_record_cache(root->fs_info, &extent_cache);
8311         free_root_item_list(&normal_trees);
8312         free_root_item_list(&dropping_trees);
8313         extent_io_tree_cleanup(&excluded_extents);
8314         goto again;
8315 }
8316
8317 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8318                            struct btrfs_root *root, int overwrite)
8319 {
8320         struct extent_buffer *c;
8321         struct extent_buffer *old = root->node;
8322         int level;
8323         int ret;
8324         struct btrfs_disk_key disk_key = {0,0,0};
8325
8326         level = 0;
8327
8328         if (overwrite) {
8329                 c = old;
8330                 extent_buffer_get(c);
8331                 goto init;
8332         }
8333         c = btrfs_alloc_free_block(trans, root,
8334                                    btrfs_level_size(root, 0),
8335                                    root->root_key.objectid,
8336                                    &disk_key, level, 0, 0);
8337         if (IS_ERR(c)) {
8338                 c = old;
8339                 extent_buffer_get(c);
8340                 overwrite = 1;
8341         }
8342 init:
8343         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8344         btrfs_set_header_level(c, level);
8345         btrfs_set_header_bytenr(c, c->start);
8346         btrfs_set_header_generation(c, trans->transid);
8347         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8348         btrfs_set_header_owner(c, root->root_key.objectid);
8349
8350         write_extent_buffer(c, root->fs_info->fsid,
8351                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8352
8353         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8354                             btrfs_header_chunk_tree_uuid(c),
8355                             BTRFS_UUID_SIZE);
8356
8357         btrfs_mark_buffer_dirty(c);
8358         /*
8359          * this case can happen in the following case:
8360          *
8361          * 1.overwrite previous root.
8362          *
8363          * 2.reinit reloc data root, this is because we skip pin
8364          * down reloc data tree before which means we can allocate
8365          * same block bytenr here.
8366          */
8367         if (old->start == c->start) {
8368                 btrfs_set_root_generation(&root->root_item,
8369                                           trans->transid);
8370                 root->root_item.level = btrfs_header_level(root->node);
8371                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8372                                         &root->root_key, &root->root_item);
8373                 if (ret) {
8374                         free_extent_buffer(c);
8375                         return ret;
8376                 }
8377         }
8378         free_extent_buffer(old);
8379         root->node = c;
8380         add_root_to_dirty_list(root);
8381         return 0;
8382 }
8383
8384 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8385                                 struct extent_buffer *eb, int tree_root)
8386 {
8387         struct extent_buffer *tmp;
8388         struct btrfs_root_item *ri;
8389         struct btrfs_key key;
8390         u64 bytenr;
8391         u32 leafsize;
8392         int level = btrfs_header_level(eb);
8393         int nritems;
8394         int ret;
8395         int i;
8396
8397         /*
8398          * If we have pinned this block before, don't pin it again.
8399          * This can not only avoid forever loop with broken filesystem
8400          * but also give us some speedups.
8401          */
8402         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8403                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8404                 return 0;
8405
8406         btrfs_pin_extent(fs_info, eb->start, eb->len);
8407
8408         leafsize = btrfs_super_leafsize(fs_info->super_copy);
8409         nritems = btrfs_header_nritems(eb);
8410         for (i = 0; i < nritems; i++) {
8411                 if (level == 0) {
8412                         btrfs_item_key_to_cpu(eb, &key, i);
8413                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8414                                 continue;
8415                         /* Skip the extent root and reloc roots */
8416                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8417                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8418                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8419                                 continue;
8420                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8421                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8422
8423                         /*
8424                          * If at any point we start needing the real root we
8425                          * will have to build a stump root for the root we are
8426                          * in, but for now this doesn't actually use the root so
8427                          * just pass in extent_root.
8428                          */
8429                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8430                                               leafsize, 0);
8431                         if (!extent_buffer_uptodate(tmp)) {
8432                                 fprintf(stderr, "Error reading root block\n");
8433                                 return -EIO;
8434                         }
8435                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8436                         free_extent_buffer(tmp);
8437                         if (ret)
8438                                 return ret;
8439                 } else {
8440                         bytenr = btrfs_node_blockptr(eb, i);
8441
8442                         /* If we aren't the tree root don't read the block */
8443                         if (level == 1 && !tree_root) {
8444                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
8445                                 continue;
8446                         }
8447
8448                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8449                                               leafsize, 0);
8450                         if (!extent_buffer_uptodate(tmp)) {
8451                                 fprintf(stderr, "Error reading tree block\n");
8452                                 return -EIO;
8453                         }
8454                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8455                         free_extent_buffer(tmp);
8456                         if (ret)
8457                                 return ret;
8458                 }
8459         }
8460
8461         return 0;
8462 }
8463
8464 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8465 {
8466         int ret;
8467
8468         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8469         if (ret)
8470                 return ret;
8471
8472         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8473 }
8474
8475 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8476 {
8477         struct btrfs_block_group_cache *cache;
8478         struct btrfs_path *path;
8479         struct extent_buffer *leaf;
8480         struct btrfs_chunk *chunk;
8481         struct btrfs_key key;
8482         int ret;
8483         u64 start;
8484
8485         path = btrfs_alloc_path();
8486         if (!path)
8487                 return -ENOMEM;
8488
8489         key.objectid = 0;
8490         key.type = BTRFS_CHUNK_ITEM_KEY;
8491         key.offset = 0;
8492
8493         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8494         if (ret < 0) {
8495                 btrfs_free_path(path);
8496                 return ret;
8497         }
8498
8499         /*
8500          * We do this in case the block groups were screwed up and had alloc
8501          * bits that aren't actually set on the chunks.  This happens with
8502          * restored images every time and could happen in real life I guess.
8503          */
8504         fs_info->avail_data_alloc_bits = 0;
8505         fs_info->avail_metadata_alloc_bits = 0;
8506         fs_info->avail_system_alloc_bits = 0;
8507
8508         /* First we need to create the in-memory block groups */
8509         while (1) {
8510                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8511                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8512                         if (ret < 0) {
8513                                 btrfs_free_path(path);
8514                                 return ret;
8515                         }
8516                         if (ret) {
8517                                 ret = 0;
8518                                 break;
8519                         }
8520                 }
8521                 leaf = path->nodes[0];
8522                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8523                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8524                         path->slots[0]++;
8525                         continue;
8526                 }
8527
8528                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8529                                        struct btrfs_chunk);
8530                 btrfs_add_block_group(fs_info, 0,
8531                                       btrfs_chunk_type(leaf, chunk),
8532                                       key.objectid, key.offset,
8533                                       btrfs_chunk_length(leaf, chunk));
8534                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8535                                  key.offset + btrfs_chunk_length(leaf, chunk),
8536                                  GFP_NOFS);
8537                 path->slots[0]++;
8538         }
8539         start = 0;
8540         while (1) {
8541                 cache = btrfs_lookup_first_block_group(fs_info, start);
8542                 if (!cache)
8543                         break;
8544                 cache->cached = 1;
8545                 start = cache->key.objectid + cache->key.offset;
8546         }
8547
8548         btrfs_free_path(path);
8549         return 0;
8550 }
8551
8552 static int reset_balance(struct btrfs_trans_handle *trans,
8553                          struct btrfs_fs_info *fs_info)
8554 {
8555         struct btrfs_root *root = fs_info->tree_root;
8556         struct btrfs_path *path;
8557         struct extent_buffer *leaf;
8558         struct btrfs_key key;
8559         int del_slot, del_nr = 0;
8560         int ret;
8561         int found = 0;
8562
8563         path = btrfs_alloc_path();
8564         if (!path)
8565                 return -ENOMEM;
8566
8567         key.objectid = BTRFS_BALANCE_OBJECTID;
8568         key.type = BTRFS_BALANCE_ITEM_KEY;
8569         key.offset = 0;
8570
8571         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8572         if (ret) {
8573                 if (ret > 0)
8574                         ret = 0;
8575                 if (!ret)
8576                         goto reinit_data_reloc;
8577                 else
8578                         goto out;
8579         }
8580
8581         ret = btrfs_del_item(trans, root, path);
8582         if (ret)
8583                 goto out;
8584         btrfs_release_path(path);
8585
8586         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8587         key.type = BTRFS_ROOT_ITEM_KEY;
8588         key.offset = 0;
8589
8590         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8591         if (ret < 0)
8592                 goto out;
8593         while (1) {
8594                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8595                         if (!found)
8596                                 break;
8597
8598                         if (del_nr) {
8599                                 ret = btrfs_del_items(trans, root, path,
8600                                                       del_slot, del_nr);
8601                                 del_nr = 0;
8602                                 if (ret)
8603                                         goto out;
8604                         }
8605                         key.offset++;
8606                         btrfs_release_path(path);
8607
8608                         found = 0;
8609                         ret = btrfs_search_slot(trans, root, &key, path,
8610                                                 -1, 1);
8611                         if (ret < 0)
8612                                 goto out;
8613                         continue;
8614                 }
8615                 found = 1;
8616                 leaf = path->nodes[0];
8617                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8618                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8619                         break;
8620                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8621                         path->slots[0]++;
8622                         continue;
8623                 }
8624                 if (!del_nr) {
8625                         del_slot = path->slots[0];
8626                         del_nr = 1;
8627                 } else {
8628                         del_nr++;
8629                 }
8630                 path->slots[0]++;
8631         }
8632
8633         if (del_nr) {
8634                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8635                 if (ret)
8636                         goto out;
8637         }
8638         btrfs_release_path(path);
8639
8640 reinit_data_reloc:
8641         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8642         key.type = BTRFS_ROOT_ITEM_KEY;
8643         key.offset = (u64)-1;
8644         root = btrfs_read_fs_root(fs_info, &key);
8645         if (IS_ERR(root)) {
8646                 fprintf(stderr, "Error reading data reloc tree\n");
8647                 ret = PTR_ERR(root);
8648                 goto out;
8649         }
8650         record_root_in_trans(trans, root);
8651         ret = btrfs_fsck_reinit_root(trans, root, 0);
8652         if (ret)
8653                 goto out;
8654         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8655 out:
8656         btrfs_free_path(path);
8657         return ret;
8658 }
8659
8660 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8661                               struct btrfs_fs_info *fs_info)
8662 {
8663         u64 start = 0;
8664         int ret;
8665
8666         /*
8667          * The only reason we don't do this is because right now we're just
8668          * walking the trees we find and pinning down their bytes, we don't look
8669          * at any of the leaves.  In order to do mixed groups we'd have to check
8670          * the leaves of any fs roots and pin down the bytes for any file
8671          * extents we find.  Not hard but why do it if we don't have to?
8672          */
8673         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8674                 fprintf(stderr, "We don't support re-initing the extent tree "
8675                         "for mixed block groups yet, please notify a btrfs "
8676                         "developer you want to do this so they can add this "
8677                         "functionality.\n");
8678                 return -EINVAL;
8679         }
8680
8681         /*
8682          * first we need to walk all of the trees except the extent tree and pin
8683          * down the bytes that are in use so we don't overwrite any existing
8684          * metadata.
8685          */
8686         ret = pin_metadata_blocks(fs_info);
8687         if (ret) {
8688                 fprintf(stderr, "error pinning down used bytes\n");
8689                 return ret;
8690         }
8691
8692         /*
8693          * Need to drop all the block groups since we're going to recreate all
8694          * of them again.
8695          */
8696         btrfs_free_block_groups(fs_info);
8697         ret = reset_block_groups(fs_info);
8698         if (ret) {
8699                 fprintf(stderr, "error resetting the block groups\n");
8700                 return ret;
8701         }
8702
8703         /* Ok we can allocate now, reinit the extent root */
8704         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8705         if (ret) {
8706                 fprintf(stderr, "extent root initialization failed\n");
8707                 /*
8708                  * When the transaction code is updated we should end the
8709                  * transaction, but for now progs only knows about commit so
8710                  * just return an error.
8711                  */
8712                 return ret;
8713         }
8714
8715         /*
8716          * Now we have all the in-memory block groups setup so we can make
8717          * allocations properly, and the metadata we care about is safe since we
8718          * pinned all of it above.
8719          */
8720         while (1) {
8721                 struct btrfs_block_group_cache *cache;
8722
8723                 cache = btrfs_lookup_first_block_group(fs_info, start);
8724                 if (!cache)
8725                         break;
8726                 start = cache->key.objectid + cache->key.offset;
8727                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8728                                         &cache->key, &cache->item,
8729                                         sizeof(cache->item));
8730                 if (ret) {
8731                         fprintf(stderr, "Error adding block group\n");
8732                         return ret;
8733                 }
8734                 btrfs_extent_post_op(trans, fs_info->extent_root);
8735         }
8736
8737         ret = reset_balance(trans, fs_info);
8738         if (ret)
8739                 fprintf(stderr, "error reseting the pending balance\n");
8740
8741         return ret;
8742 }
8743
8744 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8745 {
8746         struct btrfs_path *path;
8747         struct btrfs_trans_handle *trans;
8748         struct btrfs_key key;
8749         int ret;
8750
8751         printf("Recowing metadata block %llu\n", eb->start);
8752         key.objectid = btrfs_header_owner(eb);
8753         key.type = BTRFS_ROOT_ITEM_KEY;
8754         key.offset = (u64)-1;
8755
8756         root = btrfs_read_fs_root(root->fs_info, &key);
8757         if (IS_ERR(root)) {
8758                 fprintf(stderr, "Couldn't find owner root %llu\n",
8759                         key.objectid);
8760                 return PTR_ERR(root);
8761         }
8762
8763         path = btrfs_alloc_path();
8764         if (!path)
8765                 return -ENOMEM;
8766
8767         trans = btrfs_start_transaction(root, 1);
8768         if (IS_ERR(trans)) {
8769                 btrfs_free_path(path);
8770                 return PTR_ERR(trans);
8771         }
8772
8773         path->lowest_level = btrfs_header_level(eb);
8774         if (path->lowest_level)
8775                 btrfs_node_key_to_cpu(eb, &key, 0);
8776         else
8777                 btrfs_item_key_to_cpu(eb, &key, 0);
8778
8779         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8780         btrfs_commit_transaction(trans, root);
8781         btrfs_free_path(path);
8782         return ret;
8783 }
8784
8785 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8786 {
8787         struct btrfs_path *path;
8788         struct btrfs_trans_handle *trans;
8789         struct btrfs_key key;
8790         int ret;
8791
8792         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8793                bad->key.type, bad->key.offset);
8794         key.objectid = bad->root_id;
8795         key.type = BTRFS_ROOT_ITEM_KEY;
8796         key.offset = (u64)-1;
8797
8798         root = btrfs_read_fs_root(root->fs_info, &key);
8799         if (IS_ERR(root)) {
8800                 fprintf(stderr, "Couldn't find owner root %llu\n",
8801                         key.objectid);
8802                 return PTR_ERR(root);
8803         }
8804
8805         path = btrfs_alloc_path();
8806         if (!path)
8807                 return -ENOMEM;
8808
8809         trans = btrfs_start_transaction(root, 1);
8810         if (IS_ERR(trans)) {
8811                 btrfs_free_path(path);
8812                 return PTR_ERR(trans);
8813         }
8814
8815         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8816         if (ret) {
8817                 if (ret > 0)
8818                         ret = 0;
8819                 goto out;
8820         }
8821         ret = btrfs_del_item(trans, root, path);
8822 out:
8823         btrfs_commit_transaction(trans, root);
8824         btrfs_free_path(path);
8825         return ret;
8826 }
8827
8828 static int zero_log_tree(struct btrfs_root *root)
8829 {
8830         struct btrfs_trans_handle *trans;
8831         int ret;
8832
8833         trans = btrfs_start_transaction(root, 1);
8834         if (IS_ERR(trans)) {
8835                 ret = PTR_ERR(trans);
8836                 return ret;
8837         }
8838         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8839         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8840         ret = btrfs_commit_transaction(trans, root);
8841         return ret;
8842 }
8843
8844 static int populate_csum(struct btrfs_trans_handle *trans,
8845                          struct btrfs_root *csum_root, char *buf, u64 start,
8846                          u64 len)
8847 {
8848         u64 offset = 0;
8849         u64 sectorsize;
8850         int ret = 0;
8851
8852         while (offset < len) {
8853                 sectorsize = csum_root->sectorsize;
8854                 ret = read_extent_data(csum_root, buf, start + offset,
8855                                        &sectorsize, 0);
8856                 if (ret)
8857                         break;
8858                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8859                                             start + offset, buf, sectorsize);
8860                 if (ret)
8861                         break;
8862                 offset += sectorsize;
8863         }
8864         return ret;
8865 }
8866
8867 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8868                                       struct btrfs_root *csum_root,
8869                                       struct btrfs_root *cur_root)
8870 {
8871         struct btrfs_path *path;
8872         struct btrfs_key key;
8873         struct extent_buffer *node;
8874         struct btrfs_file_extent_item *fi;
8875         char *buf = NULL;
8876         u64 start = 0;
8877         u64 len = 0;
8878         int slot = 0;
8879         int ret = 0;
8880
8881         path = btrfs_alloc_path();
8882         if (!path)
8883                 return -ENOMEM;
8884         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8885         if (!buf) {
8886                 ret = -ENOMEM;
8887                 goto out;
8888         }
8889
8890         key.objectid = 0;
8891         key.offset = 0;
8892         key.type = 0;
8893
8894         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8895         if (ret < 0)
8896                 goto out;
8897         /* Iterate all regular file extents and fill its csum */
8898         while (1) {
8899                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8900
8901                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8902                         goto next;
8903                 node = path->nodes[0];
8904                 slot = path->slots[0];
8905                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8906                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8907                         goto next;
8908                 start = btrfs_file_extent_disk_bytenr(node, fi);
8909                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8910
8911                 ret = populate_csum(trans, csum_root, buf, start, len);
8912                 if (ret == -EEXIST)
8913                         ret = 0;
8914                 if (ret < 0)
8915                         goto out;
8916 next:
8917                 /*
8918                  * TODO: if next leaf is corrupted, jump to nearest next valid
8919                  * leaf.
8920                  */
8921                 ret = btrfs_next_item(cur_root, path);
8922                 if (ret < 0)
8923                         goto out;
8924                 if (ret > 0) {
8925                         ret = 0;
8926                         goto out;
8927                 }
8928         }
8929
8930 out:
8931         btrfs_free_path(path);
8932         free(buf);
8933         return ret;
8934 }
8935
8936 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8937                                   struct btrfs_root *csum_root)
8938 {
8939         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8940         struct btrfs_path *path;
8941         struct btrfs_root *tree_root = fs_info->tree_root;
8942         struct btrfs_root *cur_root;
8943         struct extent_buffer *node;
8944         struct btrfs_key key;
8945         int slot = 0;
8946         int ret = 0;
8947
8948         path = btrfs_alloc_path();
8949         if (!path)
8950                 return -ENOMEM;
8951
8952         key.objectid = BTRFS_FS_TREE_OBJECTID;
8953         key.offset = 0;
8954         key.type = BTRFS_ROOT_ITEM_KEY;
8955
8956         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8957         if (ret < 0)
8958                 goto out;
8959         if (ret > 0) {
8960                 ret = -ENOENT;
8961                 goto out;
8962         }
8963
8964         while (1) {
8965                 node = path->nodes[0];
8966                 slot = path->slots[0];
8967                 btrfs_item_key_to_cpu(node, &key, slot);
8968                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8969                         goto out;
8970                 if (key.type != BTRFS_ROOT_ITEM_KEY)
8971                         goto next;
8972                 if (!is_fstree(key.objectid))
8973                         goto next;
8974                 key.offset = (u64)-1;
8975
8976                 cur_root = btrfs_read_fs_root(fs_info, &key);
8977                 if (IS_ERR(cur_root) || !cur_root) {
8978                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8979                                 key.objectid);
8980                         goto out;
8981                 }
8982                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8983                                 cur_root);
8984                 if (ret < 0)
8985                         goto out;
8986 next:
8987                 ret = btrfs_next_item(tree_root, path);
8988                 if (ret > 0) {
8989                         ret = 0;
8990                         goto out;
8991                 }
8992                 if (ret < 0)
8993                         goto out;
8994         }
8995
8996 out:
8997         btrfs_free_path(path);
8998         return ret;
8999 }
9000
9001 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9002                                       struct btrfs_root *csum_root)
9003 {
9004         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9005         struct btrfs_path *path;
9006         struct btrfs_extent_item *ei;
9007         struct extent_buffer *leaf;
9008         char *buf;
9009         struct btrfs_key key;
9010         int ret;
9011
9012         path = btrfs_alloc_path();
9013         if (!path)
9014                 return -ENOMEM;
9015
9016         key.objectid = 0;
9017         key.type = BTRFS_EXTENT_ITEM_KEY;
9018         key.offset = 0;
9019
9020         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9021         if (ret < 0) {
9022                 btrfs_free_path(path);
9023                 return ret;
9024         }
9025
9026         buf = malloc(csum_root->sectorsize);
9027         if (!buf) {
9028                 btrfs_free_path(path);
9029                 return -ENOMEM;
9030         }
9031
9032         while (1) {
9033                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9034                         ret = btrfs_next_leaf(extent_root, path);
9035                         if (ret < 0)
9036                                 break;
9037                         if (ret) {
9038                                 ret = 0;
9039                                 break;
9040                         }
9041                 }
9042                 leaf = path->nodes[0];
9043
9044                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9045                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9046                         path->slots[0]++;
9047                         continue;
9048                 }
9049
9050                 ei = btrfs_item_ptr(leaf, path->slots[0],
9051                                     struct btrfs_extent_item);
9052                 if (!(btrfs_extent_flags(leaf, ei) &
9053                       BTRFS_EXTENT_FLAG_DATA)) {
9054                         path->slots[0]++;
9055                         continue;
9056                 }
9057
9058                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9059                                     key.offset);
9060                 if (ret)
9061                         break;
9062                 path->slots[0]++;
9063         }
9064
9065         btrfs_free_path(path);
9066         free(buf);
9067         return ret;
9068 }
9069
9070 /*
9071  * Recalculate the csum and put it into the csum tree.
9072  *
9073  * Extent tree init will wipe out all the extent info, so in that case, we
9074  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9075  * will use fs/subvol trees to init the csum tree.
9076  */
9077 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9078                           struct btrfs_root *csum_root,
9079                           int search_fs_tree)
9080 {
9081         if (search_fs_tree)
9082                 return fill_csum_tree_from_fs(trans, csum_root);
9083         else
9084                 return fill_csum_tree_from_extent(trans, csum_root);
9085 }
9086
9087 struct root_item_info {
9088         /* level of the root */
9089         u8 level;
9090         /* number of nodes at this level, must be 1 for a root */
9091         int node_count;
9092         u64 bytenr;
9093         u64 gen;
9094         struct cache_extent cache_extent;
9095 };
9096
9097 static struct cache_tree *roots_info_cache = NULL;
9098
9099 static void free_roots_info_cache(void)
9100 {
9101         if (!roots_info_cache)
9102                 return;
9103
9104         while (!cache_tree_empty(roots_info_cache)) {
9105                 struct cache_extent *entry;
9106                 struct root_item_info *rii;
9107
9108                 entry = first_cache_extent(roots_info_cache);
9109                 if (!entry)
9110                         break;
9111                 remove_cache_extent(roots_info_cache, entry);
9112                 rii = container_of(entry, struct root_item_info, cache_extent);
9113                 free(rii);
9114         }
9115
9116         free(roots_info_cache);
9117         roots_info_cache = NULL;
9118 }
9119
9120 static int build_roots_info_cache(struct btrfs_fs_info *info)
9121 {
9122         int ret = 0;
9123         struct btrfs_key key;
9124         struct extent_buffer *leaf;
9125         struct btrfs_path *path;
9126
9127         if (!roots_info_cache) {
9128                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9129                 if (!roots_info_cache)
9130                         return -ENOMEM;
9131                 cache_tree_init(roots_info_cache);
9132         }
9133
9134         path = btrfs_alloc_path();
9135         if (!path)
9136                 return -ENOMEM;
9137
9138         key.objectid = 0;
9139         key.type = BTRFS_EXTENT_ITEM_KEY;
9140         key.offset = 0;
9141
9142         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9143         if (ret < 0)
9144                 goto out;
9145         leaf = path->nodes[0];
9146
9147         while (1) {
9148                 struct btrfs_key found_key;
9149                 struct btrfs_extent_item *ei;
9150                 struct btrfs_extent_inline_ref *iref;
9151                 int slot = path->slots[0];
9152                 int type;
9153                 u64 flags;
9154                 u64 root_id;
9155                 u8 level;
9156                 struct cache_extent *entry;
9157                 struct root_item_info *rii;
9158
9159                 if (slot >= btrfs_header_nritems(leaf)) {
9160                         ret = btrfs_next_leaf(info->extent_root, path);
9161                         if (ret < 0) {
9162                                 break;
9163                         } else if (ret) {
9164                                 ret = 0;
9165                                 break;
9166                         }
9167                         leaf = path->nodes[0];
9168                         slot = path->slots[0];
9169                 }
9170
9171                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9172
9173                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9174                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9175                         goto next;
9176
9177                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9178                 flags = btrfs_extent_flags(leaf, ei);
9179
9180                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9181                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9182                         goto next;
9183
9184                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9185                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9186                         level = found_key.offset;
9187                 } else {
9188                         struct btrfs_tree_block_info *info;
9189
9190                         info = (struct btrfs_tree_block_info *)(ei + 1);
9191                         iref = (struct btrfs_extent_inline_ref *)(info + 1);
9192                         level = btrfs_tree_block_level(leaf, info);
9193                 }
9194
9195                 /*
9196                  * For a root extent, it must be of the following type and the
9197                  * first (and only one) iref in the item.
9198                  */
9199                 type = btrfs_extent_inline_ref_type(leaf, iref);
9200                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9201                         goto next;
9202
9203                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9204                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9205                 if (!entry) {
9206                         rii = malloc(sizeof(struct root_item_info));
9207                         if (!rii) {
9208                                 ret = -ENOMEM;
9209                                 goto out;
9210                         }
9211                         rii->cache_extent.start = root_id;
9212                         rii->cache_extent.size = 1;
9213                         rii->level = (u8)-1;
9214                         entry = &rii->cache_extent;
9215                         ret = insert_cache_extent(roots_info_cache, entry);
9216                         ASSERT(ret == 0);
9217                 } else {
9218                         rii = container_of(entry, struct root_item_info,
9219                                            cache_extent);
9220                 }
9221
9222                 ASSERT(rii->cache_extent.start == root_id);
9223                 ASSERT(rii->cache_extent.size == 1);
9224
9225                 if (level > rii->level || rii->level == (u8)-1) {
9226                         rii->level = level;
9227                         rii->bytenr = found_key.objectid;
9228                         rii->gen = btrfs_extent_generation(leaf, ei);
9229                         rii->node_count = 1;
9230                 } else if (level == rii->level) {
9231                         rii->node_count++;
9232                 }
9233 next:
9234                 path->slots[0]++;
9235         }
9236
9237 out:
9238         btrfs_free_path(path);
9239
9240         return ret;
9241 }
9242
9243 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9244                                   struct btrfs_path *path,
9245                                   const struct btrfs_key *root_key,
9246                                   const int read_only_mode)
9247 {
9248         const u64 root_id = root_key->objectid;
9249         struct cache_extent *entry;
9250         struct root_item_info *rii;
9251         struct btrfs_root_item ri;
9252         unsigned long offset;
9253
9254         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9255         if (!entry) {
9256                 fprintf(stderr,
9257                         "Error: could not find extent items for root %llu\n",
9258                         root_key->objectid);
9259                 return -ENOENT;
9260         }
9261
9262         rii = container_of(entry, struct root_item_info, cache_extent);
9263         ASSERT(rii->cache_extent.start == root_id);
9264         ASSERT(rii->cache_extent.size == 1);
9265
9266         if (rii->node_count != 1) {
9267                 fprintf(stderr,
9268                         "Error: could not find btree root extent for root %llu\n",
9269                         root_id);
9270                 return -ENOENT;
9271         }
9272
9273         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9274         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9275
9276         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9277             btrfs_root_level(&ri) != rii->level ||
9278             btrfs_root_generation(&ri) != rii->gen) {
9279
9280                 /*
9281                  * If we're in repair mode but our caller told us to not update
9282                  * the root item, i.e. just check if it needs to be updated, don't
9283                  * print this message, since the caller will call us again shortly
9284                  * for the same root item without read only mode (the caller will
9285                  * open a transaction first).
9286                  */
9287                 if (!(read_only_mode && repair))
9288                         fprintf(stderr,
9289                                 "%sroot item for root %llu,"
9290                                 " current bytenr %llu, current gen %llu, current level %u,"
9291                                 " new bytenr %llu, new gen %llu, new level %u\n",
9292                                 (read_only_mode ? "" : "fixing "),
9293                                 root_id,
9294                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9295                                 btrfs_root_level(&ri),
9296                                 rii->bytenr, rii->gen, rii->level);
9297
9298                 if (btrfs_root_generation(&ri) > rii->gen) {
9299                         fprintf(stderr,
9300                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9301                                 root_id, btrfs_root_generation(&ri), rii->gen);
9302                         return -EINVAL;
9303                 }
9304
9305                 if (!read_only_mode) {
9306                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9307                         btrfs_set_root_level(&ri, rii->level);
9308                         btrfs_set_root_generation(&ri, rii->gen);
9309                         write_extent_buffer(path->nodes[0], &ri,
9310                                             offset, sizeof(ri));
9311                 }
9312
9313                 return 1;
9314         }
9315
9316         return 0;
9317 }
9318
9319 /*
9320  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9321  * caused read-only snapshots to be corrupted if they were created at a moment
9322  * when the source subvolume/snapshot had orphan items. The issue was that the
9323  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9324  * node instead of the post orphan cleanup root node.
9325  * So this function, and its callees, just detects and fixes those cases. Even
9326  * though the regression was for read-only snapshots, this function applies to
9327  * any snapshot/subvolume root.
9328  * This must be run before any other repair code - not doing it so, makes other
9329  * repair code delete or modify backrefs in the extent tree for example, which
9330  * will result in an inconsistent fs after repairing the root items.
9331  */
9332 static int repair_root_items(struct btrfs_fs_info *info)
9333 {
9334         struct btrfs_path *path = NULL;
9335         struct btrfs_key key;
9336         struct extent_buffer *leaf;
9337         struct btrfs_trans_handle *trans = NULL;
9338         int ret = 0;
9339         int bad_roots = 0;
9340         int need_trans = 0;
9341
9342         ret = build_roots_info_cache(info);
9343         if (ret)
9344                 goto out;
9345
9346         path = btrfs_alloc_path();
9347         if (!path) {
9348                 ret = -ENOMEM;
9349                 goto out;
9350         }
9351
9352         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9353         key.type = BTRFS_ROOT_ITEM_KEY;
9354         key.offset = 0;
9355
9356 again:
9357         /*
9358          * Avoid opening and committing transactions if a leaf doesn't have
9359          * any root items that need to be fixed, so that we avoid rotating
9360          * backup roots unnecessarily.
9361          */
9362         if (need_trans) {
9363                 trans = btrfs_start_transaction(info->tree_root, 1);
9364                 if (IS_ERR(trans)) {
9365                         ret = PTR_ERR(trans);
9366                         goto out;
9367                 }
9368         }
9369
9370         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9371                                 0, trans ? 1 : 0);
9372         if (ret < 0)
9373                 goto out;
9374         leaf = path->nodes[0];
9375
9376         while (1) {
9377                 struct btrfs_key found_key;
9378
9379                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9380                         int no_more_keys = find_next_key(path, &key);
9381
9382                         btrfs_release_path(path);
9383                         if (trans) {
9384                                 ret = btrfs_commit_transaction(trans,
9385                                                                info->tree_root);
9386                                 trans = NULL;
9387                                 if (ret < 0)
9388                                         goto out;
9389                         }
9390                         need_trans = 0;
9391                         if (no_more_keys)
9392                                 break;
9393                         goto again;
9394                 }
9395
9396                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9397
9398                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9399                         goto next;
9400                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9401                         goto next;
9402
9403                 ret = maybe_repair_root_item(info, path, &found_key,
9404                                              trans ? 0 : 1);
9405                 if (ret < 0)
9406                         goto out;
9407                 if (ret) {
9408                         if (!trans && repair) {
9409                                 need_trans = 1;
9410                                 key = found_key;
9411                                 btrfs_release_path(path);
9412                                 goto again;
9413                         }
9414                         bad_roots++;
9415                 }
9416 next:
9417                 path->slots[0]++;
9418         }
9419         ret = 0;
9420 out:
9421         free_roots_info_cache();
9422         btrfs_free_path(path);
9423         if (trans)
9424                 btrfs_commit_transaction(trans, info->tree_root);
9425         if (ret < 0)
9426                 return ret;
9427
9428         return bad_roots;
9429 }
9430
9431 const char * const cmd_check_usage[] = {
9432         "btrfs check [options] <device>",
9433         "Check structural inegrity of a filesystem (unmounted).",
9434         "Check structural inegrity of an unmounted filesystem. Verify internal",
9435         "trees' consistency and item connectivity. In the repair mode try to",
9436         "fix the problems found.",
9437         "WARNING: the repair mode is considered dangerous",
9438         "",
9439         "-s|--super <superblock>     use this superblock copy",
9440         "-b|--backup                 use the backup root copy",
9441         "--repair                    try to repair the filesystem",
9442         "--readonly                  run in read-only mode (default)",
9443         "--init-csum-tree            create a new CRC tree",
9444         "--init-extent-tree          create a new extent tree",
9445         "--check-data-csum           verify checkums of data blocks",
9446         "-Q|--qgroup-report           print a report on qgroup consistency",
9447         "-E|--subvol-extents <subvolid>",
9448         "                            print subvolume extents and sharing state",
9449         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9450         "-p|--progress               indicate progress",
9451         NULL
9452 };
9453
9454 int cmd_check(int argc, char **argv)
9455 {
9456         struct cache_tree root_cache;
9457         struct btrfs_root *root;
9458         struct btrfs_fs_info *info;
9459         u64 bytenr = 0;
9460         u64 subvolid = 0;
9461         u64 tree_root_bytenr = 0;
9462         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9463         int ret;
9464         u64 num;
9465         int init_csum_tree = 0;
9466         int readonly = 0;
9467         int qgroup_report = 0;
9468         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9469
9470         while(1) {
9471                 int c;
9472                 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9473                         OPT_CHECK_CSUM, OPT_READONLY };
9474                 static const struct option long_options[] = {
9475                         { "super", required_argument, NULL, 's' },
9476                         { "repair", no_argument, NULL, OPT_REPAIR },
9477                         { "readonly", no_argument, NULL, OPT_READONLY },
9478                         { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9479                         { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9480                         { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9481                         { "backup", no_argument, NULL, 'b' },
9482                         { "subvol-extents", required_argument, NULL, 'E' },
9483                         { "qgroup-report", no_argument, NULL, 'Q' },
9484                         { "tree-root", required_argument, NULL, 'r' },
9485                         { "progress", no_argument, NULL, 'p' },
9486                         { NULL, 0, NULL, 0}
9487                 };
9488
9489                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9490                 if (c < 0)
9491                         break;
9492                 switch(c) {
9493                         case 'a': /* ignored */ break;
9494                         case 'b':
9495                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9496                                 break;
9497                         case 's':
9498                                 num = arg_strtou64(optarg);
9499                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9500                                         fprintf(stderr,
9501                                                 "ERROR: super mirror should be less than: %d\n",
9502                                                 BTRFS_SUPER_MIRROR_MAX);
9503                                         exit(1);
9504                                 }
9505                                 bytenr = btrfs_sb_offset(((int)num));
9506                                 printf("using SB copy %llu, bytenr %llu\n", num,
9507                                        (unsigned long long)bytenr);
9508                                 break;
9509                         case 'Q':
9510                                 qgroup_report = 1;
9511                                 break;
9512                         case 'E':
9513                                 subvolid = arg_strtou64(optarg);
9514                                 break;
9515                         case 'r':
9516                                 tree_root_bytenr = arg_strtou64(optarg);
9517                                 break;
9518                         case 'p':
9519                                 ctx.progress_enabled = true;
9520                                 break;
9521                         case '?':
9522                         case 'h':
9523                                 usage(cmd_check_usage);
9524                         case OPT_REPAIR:
9525                                 printf("enabling repair mode\n");
9526                                 repair = 1;
9527                                 ctree_flags |= OPEN_CTREE_WRITES;
9528                                 break;
9529                         case OPT_READONLY:
9530                                 readonly = 1;
9531                                 break;
9532                         case OPT_INIT_CSUM:
9533                                 printf("Creating a new CRC tree\n");
9534                                 init_csum_tree = 1;
9535                                 repair = 1;
9536                                 ctree_flags |= OPEN_CTREE_WRITES;
9537                                 break;
9538                         case OPT_INIT_EXTENT:
9539                                 init_extent_tree = 1;
9540                                 ctree_flags |= (OPEN_CTREE_WRITES |
9541                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9542                                 repair = 1;
9543                                 break;
9544                         case OPT_CHECK_CSUM:
9545                                 check_data_csum = 1;
9546                                 break;
9547                 }
9548         }
9549         argc = argc - optind;
9550
9551         if (check_argc_exact(argc, 1))
9552                 usage(cmd_check_usage);
9553
9554         if (ctx.progress_enabled) {
9555                 ctx.tp = TASK_NOTHING;
9556                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9557         }
9558
9559         /* This check is the only reason for --readonly to exist */
9560         if (readonly && repair) {
9561                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9562                 exit(1);
9563         }
9564
9565         radix_tree_init();
9566         cache_tree_init(&root_cache);
9567
9568         if((ret = check_mounted(argv[optind])) < 0) {
9569                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9570                 goto err_out;
9571         } else if(ret) {
9572                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9573                 ret = -EBUSY;
9574                 goto err_out;
9575         }
9576
9577         /* only allow partial opening under repair mode */
9578         if (repair)
9579                 ctree_flags |= OPEN_CTREE_PARTIAL;
9580
9581         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9582                                   ctree_flags);
9583         if (!info) {
9584                 fprintf(stderr, "Couldn't open file system\n");
9585                 ret = -EIO;
9586                 goto err_out;
9587         }
9588
9589         global_info = info;
9590         root = info->fs_root;
9591
9592         /*
9593          * repair mode will force us to commit transaction which
9594          * will make us fail to load log tree when mounting.
9595          */
9596         if (repair && btrfs_super_log_root(info->super_copy)) {
9597                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9598                 if (!ret) {
9599                         ret = 1;
9600                         goto close_out;
9601                 }
9602                 ret = zero_log_tree(root);
9603                 if (ret) {
9604                         fprintf(stderr, "fail to zero log tree\n");
9605                         goto close_out;
9606                 }
9607         }
9608
9609         uuid_unparse(info->super_copy->fsid, uuidbuf);
9610         if (qgroup_report) {
9611                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9612                        uuidbuf);
9613                 ret = qgroup_verify_all(info);
9614                 if (ret == 0)
9615                         print_qgroup_report(1);
9616                 goto close_out;
9617         }
9618         if (subvolid) {
9619                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9620                        subvolid, argv[optind], uuidbuf);
9621                 ret = print_extent_state(info, subvolid);
9622                 goto close_out;
9623         }
9624         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9625
9626         if (!extent_buffer_uptodate(info->tree_root->node) ||
9627             !extent_buffer_uptodate(info->dev_root->node) ||
9628             !extent_buffer_uptodate(info->chunk_root->node)) {
9629                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9630                 ret = -EIO;
9631                 goto close_out;
9632         }
9633
9634         if (init_extent_tree || init_csum_tree) {
9635                 struct btrfs_trans_handle *trans;
9636
9637                 trans = btrfs_start_transaction(info->extent_root, 0);
9638                 if (IS_ERR(trans)) {
9639                         fprintf(stderr, "Error starting transaction\n");
9640                         ret = PTR_ERR(trans);
9641                         goto close_out;
9642                 }
9643
9644                 if (init_extent_tree) {
9645                         printf("Creating a new extent tree\n");
9646                         ret = reinit_extent_tree(trans, info);
9647                         if (ret)
9648                                 goto close_out;
9649                 }
9650
9651                 if (init_csum_tree) {
9652                         fprintf(stderr, "Reinit crc root\n");
9653                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9654                         if (ret) {
9655                                 fprintf(stderr, "crc root initialization failed\n");
9656                                 ret = -EIO;
9657                                 goto close_out;
9658                         }
9659
9660                         ret = fill_csum_tree(trans, info->csum_root,
9661                                              init_extent_tree);
9662                         if (ret) {
9663                                 fprintf(stderr, "crc refilling failed\n");
9664                                 return -EIO;
9665                         }
9666                 }
9667                 /*
9668                  * Ok now we commit and run the normal fsck, which will add
9669                  * extent entries for all of the items it finds.
9670                  */
9671                 ret = btrfs_commit_transaction(trans, info->extent_root);
9672                 if (ret)
9673                         goto close_out;
9674         }
9675         if (!extent_buffer_uptodate(info->extent_root->node)) {
9676                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9677                 ret = -EIO;
9678                 goto close_out;
9679         }
9680         if (!extent_buffer_uptodate(info->csum_root->node)) {
9681                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9682                 ret = -EIO;
9683                 goto close_out;
9684         }
9685
9686         if (!ctx.progress_enabled)
9687                 fprintf(stderr, "checking extents\n");
9688         ret = check_chunks_and_extents(root);
9689         if (ret)
9690                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9691
9692         ret = repair_root_items(info);
9693         if (ret < 0)
9694                 goto close_out;
9695         if (repair) {
9696                 fprintf(stderr, "Fixed %d roots.\n", ret);
9697                 ret = 0;
9698         } else if (ret > 0) {
9699                 fprintf(stderr,
9700                        "Found %d roots with an outdated root item.\n",
9701                        ret);
9702                 fprintf(stderr,
9703                         "Please run a filesystem check with the option --repair to fix them.\n");
9704                 ret = 1;
9705                 goto close_out;
9706         }
9707
9708         if (!ctx.progress_enabled)
9709                 fprintf(stderr, "checking free space cache\n");
9710         ret = check_space_cache(root);
9711         if (ret)
9712                 goto out;
9713
9714         /*
9715          * We used to have to have these hole extents in between our real
9716          * extents so if we don't have this flag set we need to make sure there
9717          * are no gaps in the file extents for inodes, otherwise we can just
9718          * ignore it when this happens.
9719          */
9720         no_holes = btrfs_fs_incompat(root->fs_info,
9721                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9722         if (!ctx.progress_enabled)
9723                 fprintf(stderr, "checking fs roots\n");
9724         ret = check_fs_roots(root, &root_cache);
9725         if (ret)
9726                 goto out;
9727
9728         fprintf(stderr, "checking csums\n");
9729         ret = check_csums(root);
9730         if (ret)
9731                 goto out;
9732
9733         fprintf(stderr, "checking root refs\n");
9734         ret = check_root_refs(root, &root_cache);
9735         if (ret)
9736                 goto out;
9737
9738         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9739                 struct extent_buffer *eb;
9740
9741                 eb = list_first_entry(&root->fs_info->recow_ebs,
9742                                       struct extent_buffer, recow);
9743                 list_del_init(&eb->recow);
9744                 ret = recow_extent_buffer(root, eb);
9745                 if (ret)
9746                         break;
9747         }
9748
9749         while (!list_empty(&delete_items)) {
9750                 struct bad_item *bad;
9751
9752                 bad = list_first_entry(&delete_items, struct bad_item, list);
9753                 list_del_init(&bad->list);
9754                 if (repair)
9755                         ret = delete_bad_item(root, bad);
9756                 free(bad);
9757         }
9758
9759         if (info->quota_enabled) {
9760                 int err;
9761                 fprintf(stderr, "checking quota groups\n");
9762                 err = qgroup_verify_all(info);
9763                 if (err)
9764                         goto out;
9765         }
9766
9767         if (!list_empty(&root->fs_info->recow_ebs)) {
9768                 fprintf(stderr, "Transid errors in file system\n");
9769                 ret = 1;
9770         }
9771 out:
9772         print_qgroup_report(0);
9773         if (found_old_backref) { /*
9774                  * there was a disk format change when mixed
9775                  * backref was in testing tree. The old format
9776                  * existed about one week.
9777                  */
9778                 printf("\n * Found old mixed backref format. "
9779                        "The old format is not supported! *"
9780                        "\n * Please mount the FS in readonly mode, "
9781                        "backup data and re-format the FS. *\n\n");
9782                 ret = 1;
9783         }
9784         printf("found %llu bytes used err is %d\n",
9785                (unsigned long long)bytes_used, ret);
9786         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9787         printf("total tree bytes: %llu\n",
9788                (unsigned long long)total_btree_bytes);
9789         printf("total fs tree bytes: %llu\n",
9790                (unsigned long long)total_fs_tree_bytes);
9791         printf("total extent tree bytes: %llu\n",
9792                (unsigned long long)total_extent_tree_bytes);
9793         printf("btree space waste bytes: %llu\n",
9794                (unsigned long long)btree_space_waste);
9795         printf("file data blocks allocated: %llu\n referenced %llu\n",
9796                 (unsigned long long)data_bytes_allocated,
9797                 (unsigned long long)data_bytes_referenced);
9798
9799         free_root_recs_tree(&root_cache);
9800 close_out:
9801         close_ctree(root);
9802 err_out:
9803         if (ctx.progress_enabled)
9804                 task_deinit(ctx.info);
9805
9806         return ret;
9807 }