btrfs-progs: fsck: Add repair function for I_ERR_FILE_WRONG_NBYTES
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "transaction.h"
34 #include "utils.h"
35 #include "commands.h"
36 #include "free-space-cache.h"
37 #include "btrfsck.h"
38 #include "qgroup-verify.h"
39 #include "rbtree-utils.h"
40 #include "backref.h"
41 #include "ulist.h"
42
43 static u64 bytes_used = 0;
44 static u64 total_csum_bytes = 0;
45 static u64 total_btree_bytes = 0;
46 static u64 total_fs_tree_bytes = 0;
47 static u64 total_extent_tree_bytes = 0;
48 static u64 btree_space_waste = 0;
49 static u64 data_bytes_allocated = 0;
50 static u64 data_bytes_referenced = 0;
51 static int found_old_backref = 0;
52 static LIST_HEAD(duplicate_extents);
53 static LIST_HEAD(delete_items);
54 static int repair = 0;
55 static int no_holes = 0;
56 static int init_extent_tree = 0;
57 static int check_data_csum = 0;
58
59 struct extent_backref {
60         struct list_head list;
61         unsigned int is_data:1;
62         unsigned int found_extent_tree:1;
63         unsigned int full_backref:1;
64         unsigned int found_ref:1;
65         unsigned int broken:1;
66 };
67
68 struct data_backref {
69         struct extent_backref node;
70         union {
71                 u64 parent;
72                 u64 root;
73         };
74         u64 owner;
75         u64 offset;
76         u64 disk_bytenr;
77         u64 bytes;
78         u64 ram_bytes;
79         u32 num_refs;
80         u32 found_ref;
81 };
82
83 /*
84  * Much like data_backref, just removed the undetermined members
85  * and change it to use list_head.
86  * During extent scan, it is stored in root->orphan_data_extent.
87  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
88  */
89 struct orphan_data_extent {
90         struct list_head list;
91         u64 root;
92         u64 objectid;
93         u64 offset;
94         u64 disk_bytenr;
95         u64 disk_len;
96 };
97
98 struct tree_backref {
99         struct extent_backref node;
100         union {
101                 u64 parent;
102                 u64 root;
103         };
104 };
105
106 struct extent_record {
107         struct list_head backrefs;
108         struct list_head dups;
109         struct list_head list;
110         struct cache_extent cache;
111         struct btrfs_disk_key parent_key;
112         u64 start;
113         u64 max_size;
114         u64 nr;
115         u64 refs;
116         u64 extent_item_refs;
117         u64 generation;
118         u64 parent_generation;
119         u64 info_objectid;
120         u32 num_duplicates;
121         u8 info_level;
122         int flag_block_full_backref;
123         unsigned int found_rec:1;
124         unsigned int content_checked:1;
125         unsigned int owner_ref_checked:1;
126         unsigned int is_root:1;
127         unsigned int metadata:1;
128         unsigned int bad_full_backref:1;
129 };
130
131 struct inode_backref {
132         struct list_head list;
133         unsigned int found_dir_item:1;
134         unsigned int found_dir_index:1;
135         unsigned int found_inode_ref:1;
136         unsigned int filetype:8;
137         int errors;
138         unsigned int ref_type;
139         u64 dir;
140         u64 index;
141         u16 namelen;
142         char name[0];
143 };
144
145 struct root_item_record {
146         struct list_head list;
147         u64 objectid;
148         u64 bytenr;
149         u64 last_snapshot;
150         u8 level;
151         u8 drop_level;
152         int level_size;
153         struct btrfs_key drop_key;
154 };
155
156 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
157 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
158 #define REF_ERR_NO_INODE_REF            (1 << 2)
159 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
160 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
161 #define REF_ERR_DUP_INODE_REF           (1 << 5)
162 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
163 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
164 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
165 #define REF_ERR_NO_ROOT_REF             (1 << 9)
166 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
167 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
168 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
169
170 struct file_extent_hole {
171         struct rb_node node;
172         u64 start;
173         u64 len;
174 };
175
176 /* Compatible function to allow reuse of old codes */
177 static u64 first_extent_gap(struct rb_root *holes)
178 {
179         struct file_extent_hole *hole;
180
181         if (RB_EMPTY_ROOT(holes))
182                 return (u64)-1;
183
184         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
185         return hole->start;
186 }
187
188 int compare_hole(struct rb_node *node1, struct rb_node *node2)
189 {
190         struct file_extent_hole *hole1;
191         struct file_extent_hole *hole2;
192
193         hole1 = rb_entry(node1, struct file_extent_hole, node);
194         hole2 = rb_entry(node2, struct file_extent_hole, node);
195
196         if (hole1->start > hole2->start)
197                 return -1;
198         if (hole1->start < hole2->start)
199                 return 1;
200         /* Now hole1->start == hole2->start */
201         if (hole1->len >= hole2->len)
202                 /*
203                  * Hole 1 will be merge center
204                  * Same hole will be merged later
205                  */
206                 return -1;
207         /* Hole 2 will be merge center */
208         return 1;
209 }
210
211 /*
212  * Add a hole to the record
213  *
214  * This will do hole merge for copy_file_extent_holes(),
215  * which will ensure there won't be continuous holes.
216  */
217 static int add_file_extent_hole(struct rb_root *holes,
218                                 u64 start, u64 len)
219 {
220         struct file_extent_hole *hole;
221         struct file_extent_hole *prev = NULL;
222         struct file_extent_hole *next = NULL;
223
224         hole = malloc(sizeof(*hole));
225         if (!hole)
226                 return -ENOMEM;
227         hole->start = start;
228         hole->len = len;
229         /* Since compare will not return 0, no -EEXIST will happen */
230         rb_insert(holes, &hole->node, compare_hole);
231
232         /* simple merge with previous hole */
233         if (rb_prev(&hole->node))
234                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
235                                 node);
236         if (prev && prev->start + prev->len >= hole->start) {
237                 hole->len = hole->start + hole->len - prev->start;
238                 hole->start = prev->start;
239                 rb_erase(&prev->node, holes);
240                 free(prev);
241                 prev = NULL;
242         }
243
244         /* iterate merge with next holes */
245         while (1) {
246                 if (!rb_next(&hole->node))
247                         break;
248                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
249                                         node);
250                 if (hole->start + hole->len >= next->start) {
251                         if (hole->start + hole->len <= next->start + next->len)
252                                 hole->len = next->start + next->len -
253                                             hole->start;
254                         rb_erase(&next->node, holes);
255                         free(next);
256                         next = NULL;
257                 } else
258                         break;
259         }
260         return 0;
261 }
262
263 static int compare_hole_range(struct rb_node *node, void *data)
264 {
265         struct file_extent_hole *hole;
266         u64 start;
267
268         hole = (struct file_extent_hole *)data;
269         start = hole->start;
270
271         hole = rb_entry(node, struct file_extent_hole, node);
272         if (start < hole->start)
273                 return -1;
274         if (start >= hole->start && start < hole->start + hole->len)
275                 return 0;
276         return 1;
277 }
278
279 /*
280  * Delete a hole in the record
281  *
282  * This will do the hole split and is much restrict than add.
283  */
284 static int del_file_extent_hole(struct rb_root *holes,
285                                 u64 start, u64 len)
286 {
287         struct file_extent_hole *hole;
288         struct file_extent_hole tmp;
289         u64 prev_start = 0;
290         u64 prev_len = 0;
291         u64 next_start = 0;
292         u64 next_len = 0;
293         struct rb_node *node;
294         int have_prev = 0;
295         int have_next = 0;
296         int ret = 0;
297
298         tmp.start = start;
299         tmp.len = len;
300         node = rb_search(holes, &tmp, compare_hole_range, NULL);
301         if (!node)
302                 return -EEXIST;
303         hole = rb_entry(node, struct file_extent_hole, node);
304         if (start + len > hole->start + hole->len)
305                 return -EEXIST;
306
307         /*
308          * Now there will be no overflap, delete the hole and re-add the
309          * split(s) if they exists.
310          */
311         if (start > hole->start) {
312                 prev_start = hole->start;
313                 prev_len = start - hole->start;
314                 have_prev = 1;
315         }
316         if (hole->start + hole->len > start + len) {
317                 next_start = start + len;
318                 next_len = hole->start + hole->len - start - len;
319                 have_next = 1;
320         }
321         rb_erase(node, holes);
322         free(hole);
323         if (have_prev) {
324                 ret = add_file_extent_hole(holes, prev_start, prev_len);
325                 if (ret < 0)
326                         return ret;
327         }
328         if (have_next) {
329                 ret = add_file_extent_hole(holes, next_start, next_len);
330                 if (ret < 0)
331                         return ret;
332         }
333         return 0;
334 }
335
336 static int copy_file_extent_holes(struct rb_root *dst,
337                                   struct rb_root *src)
338 {
339         struct file_extent_hole *hole;
340         struct rb_node *node;
341         int ret = 0;
342
343         node = rb_first(src);
344         while (node) {
345                 hole = rb_entry(node, struct file_extent_hole, node);
346                 ret = add_file_extent_hole(dst, hole->start, hole->len);
347                 if (ret)
348                         break;
349                 node = rb_next(node);
350         }
351         return ret;
352 }
353
354 static void free_file_extent_holes(struct rb_root *holes)
355 {
356         struct rb_node *node;
357         struct file_extent_hole *hole;
358
359         node = rb_first(holes);
360         while (node) {
361                 hole = rb_entry(node, struct file_extent_hole, node);
362                 rb_erase(node, holes);
363                 free(hole);
364                 node = rb_first(holes);
365         }
366 }
367
368 struct inode_record {
369         struct list_head backrefs;
370         unsigned int checked:1;
371         unsigned int merging:1;
372         unsigned int found_inode_item:1;
373         unsigned int found_dir_item:1;
374         unsigned int found_file_extent:1;
375         unsigned int found_csum_item:1;
376         unsigned int some_csum_missing:1;
377         unsigned int nodatasum:1;
378         int errors;
379
380         u64 ino;
381         u32 nlink;
382         u32 imode;
383         u64 isize;
384         u64 nbytes;
385
386         u32 found_link;
387         u64 found_size;
388         u64 extent_start;
389         u64 extent_end;
390         struct rb_root holes;
391         struct list_head orphan_extents;
392
393         u32 refs;
394 };
395
396 #define I_ERR_NO_INODE_ITEM             (1 << 0)
397 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
398 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
399 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
400 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
401 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
402 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
403 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
404 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
405 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
406 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
407 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
408 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
409 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
410 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
411
412 struct root_backref {
413         struct list_head list;
414         unsigned int found_dir_item:1;
415         unsigned int found_dir_index:1;
416         unsigned int found_back_ref:1;
417         unsigned int found_forward_ref:1;
418         unsigned int reachable:1;
419         int errors;
420         u64 ref_root;
421         u64 dir;
422         u64 index;
423         u16 namelen;
424         char name[0];
425 };
426
427 struct root_record {
428         struct list_head backrefs;
429         struct cache_extent cache;
430         unsigned int found_root_item:1;
431         u64 objectid;
432         u32 found_ref;
433 };
434
435 struct ptr_node {
436         struct cache_extent cache;
437         void *data;
438 };
439
440 struct shared_node {
441         struct cache_extent cache;
442         struct cache_tree root_cache;
443         struct cache_tree inode_cache;
444         struct inode_record *current;
445         u32 refs;
446 };
447
448 struct block_info {
449         u64 start;
450         u32 size;
451 };
452
453 struct walk_control {
454         struct cache_tree shared;
455         struct shared_node *nodes[BTRFS_MAX_LEVEL];
456         int active_node;
457         int root_level;
458 };
459
460 struct bad_item {
461         struct btrfs_key key;
462         u64 root_id;
463         struct list_head list;
464 };
465
466 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
467
468 static void record_root_in_trans(struct btrfs_trans_handle *trans,
469                                  struct btrfs_root *root)
470 {
471         if (root->last_trans != trans->transid) {
472                 root->track_dirty = 1;
473                 root->last_trans = trans->transid;
474                 root->commit_root = root->node;
475                 extent_buffer_get(root->node);
476         }
477 }
478
479 static u8 imode_to_type(u32 imode)
480 {
481 #define S_SHIFT 12
482         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
483                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
484                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
485                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
486                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
487                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
488                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
489                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
490         };
491
492         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
493 #undef S_SHIFT
494 }
495
496 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
497 {
498         struct device_record *rec1;
499         struct device_record *rec2;
500
501         rec1 = rb_entry(node1, struct device_record, node);
502         rec2 = rb_entry(node2, struct device_record, node);
503         if (rec1->devid > rec2->devid)
504                 return -1;
505         else if (rec1->devid < rec2->devid)
506                 return 1;
507         else
508                 return 0;
509 }
510
511 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
512 {
513         struct inode_record *rec;
514         struct inode_backref *backref;
515         struct inode_backref *orig;
516         struct orphan_data_extent *src_orphan;
517         struct orphan_data_extent *dst_orphan;
518         size_t size;
519         int ret;
520
521         rec = malloc(sizeof(*rec));
522         memcpy(rec, orig_rec, sizeof(*rec));
523         rec->refs = 1;
524         INIT_LIST_HEAD(&rec->backrefs);
525         INIT_LIST_HEAD(&rec->orphan_extents);
526         rec->holes = RB_ROOT;
527
528         list_for_each_entry(orig, &orig_rec->backrefs, list) {
529                 size = sizeof(*orig) + orig->namelen + 1;
530                 backref = malloc(size);
531                 memcpy(backref, orig, size);
532                 list_add_tail(&backref->list, &rec->backrefs);
533         }
534         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
535                 dst_orphan = malloc(sizeof(*dst_orphan));
536                 /* TODO: Fix all the HELL of un-catched -ENOMEM case */
537                 BUG_ON(!dst_orphan);
538                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
539                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
540         }
541         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
542         BUG_ON(ret < 0);
543
544         return rec;
545 }
546
547 static void print_orphan_data_extents(struct list_head *orphan_extents,
548                                       u64 objectid)
549 {
550         struct orphan_data_extent *orphan;
551
552         if (list_empty(orphan_extents))
553                 return;
554         printf("The following data extent is lost in tree %llu:\n",
555                objectid);
556         list_for_each_entry(orphan, orphan_extents, list) {
557                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
558                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
559                        orphan->disk_len);
560         }
561 }
562
563 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
564 {
565         u64 root_objectid = root->root_key.objectid;
566         int errors = rec->errors;
567
568         if (!errors)
569                 return;
570         /* reloc root errors, we print its corresponding fs root objectid*/
571         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
572                 root_objectid = root->root_key.offset;
573                 fprintf(stderr, "reloc");
574         }
575         fprintf(stderr, "root %llu inode %llu errors %x",
576                 (unsigned long long) root_objectid,
577                 (unsigned long long) rec->ino, rec->errors);
578
579         if (errors & I_ERR_NO_INODE_ITEM)
580                 fprintf(stderr, ", no inode item");
581         if (errors & I_ERR_NO_ORPHAN_ITEM)
582                 fprintf(stderr, ", no orphan item");
583         if (errors & I_ERR_DUP_INODE_ITEM)
584                 fprintf(stderr, ", dup inode item");
585         if (errors & I_ERR_DUP_DIR_INDEX)
586                 fprintf(stderr, ", dup dir index");
587         if (errors & I_ERR_ODD_DIR_ITEM)
588                 fprintf(stderr, ", odd dir item");
589         if (errors & I_ERR_ODD_FILE_EXTENT)
590                 fprintf(stderr, ", odd file extent");
591         if (errors & I_ERR_BAD_FILE_EXTENT)
592                 fprintf(stderr, ", bad file extent");
593         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
594                 fprintf(stderr, ", file extent overlap");
595         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
596                 fprintf(stderr, ", file extent discount");
597         if (errors & I_ERR_DIR_ISIZE_WRONG)
598                 fprintf(stderr, ", dir isize wrong");
599         if (errors & I_ERR_FILE_NBYTES_WRONG)
600                 fprintf(stderr, ", nbytes wrong");
601         if (errors & I_ERR_ODD_CSUM_ITEM)
602                 fprintf(stderr, ", odd csum item");
603         if (errors & I_ERR_SOME_CSUM_MISSING)
604                 fprintf(stderr, ", some csum missing");
605         if (errors & I_ERR_LINK_COUNT_WRONG)
606                 fprintf(stderr, ", link count wrong");
607         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
608                 fprintf(stderr, ", orphan file extent");
609         fprintf(stderr, "\n");
610         /* Print the orphan extents if needed */
611         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
612                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
613
614         /* Print the holes if needed */
615         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
616                 struct file_extent_hole *hole;
617                 struct rb_node *node;
618
619                 node = rb_first(&rec->holes);
620                 fprintf(stderr, "Found file extent holes:\n");
621                 while (node) {
622                         hole = rb_entry(node, struct file_extent_hole, node);
623                         fprintf(stderr, "\tstart: %llu, len:%llu\n",
624                                 hole->start, hole->len);
625                         node = rb_next(node);
626                 }
627         }
628 }
629
630 static void print_ref_error(int errors)
631 {
632         if (errors & REF_ERR_NO_DIR_ITEM)
633                 fprintf(stderr, ", no dir item");
634         if (errors & REF_ERR_NO_DIR_INDEX)
635                 fprintf(stderr, ", no dir index");
636         if (errors & REF_ERR_NO_INODE_REF)
637                 fprintf(stderr, ", no inode ref");
638         if (errors & REF_ERR_DUP_DIR_ITEM)
639                 fprintf(stderr, ", dup dir item");
640         if (errors & REF_ERR_DUP_DIR_INDEX)
641                 fprintf(stderr, ", dup dir index");
642         if (errors & REF_ERR_DUP_INODE_REF)
643                 fprintf(stderr, ", dup inode ref");
644         if (errors & REF_ERR_INDEX_UNMATCH)
645                 fprintf(stderr, ", index unmatch");
646         if (errors & REF_ERR_FILETYPE_UNMATCH)
647                 fprintf(stderr, ", filetype unmatch");
648         if (errors & REF_ERR_NAME_TOO_LONG)
649                 fprintf(stderr, ", name too long");
650         if (errors & REF_ERR_NO_ROOT_REF)
651                 fprintf(stderr, ", no root ref");
652         if (errors & REF_ERR_NO_ROOT_BACKREF)
653                 fprintf(stderr, ", no root backref");
654         if (errors & REF_ERR_DUP_ROOT_REF)
655                 fprintf(stderr, ", dup root ref");
656         if (errors & REF_ERR_DUP_ROOT_BACKREF)
657                 fprintf(stderr, ", dup root backref");
658         fprintf(stderr, "\n");
659 }
660
661 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
662                                           u64 ino, int mod)
663 {
664         struct ptr_node *node;
665         struct cache_extent *cache;
666         struct inode_record *rec = NULL;
667         int ret;
668
669         cache = lookup_cache_extent(inode_cache, ino, 1);
670         if (cache) {
671                 node = container_of(cache, struct ptr_node, cache);
672                 rec = node->data;
673                 if (mod && rec->refs > 1) {
674                         node->data = clone_inode_rec(rec);
675                         rec->refs--;
676                         rec = node->data;
677                 }
678         } else if (mod) {
679                 rec = calloc(1, sizeof(*rec));
680                 rec->ino = ino;
681                 rec->extent_start = (u64)-1;
682                 rec->refs = 1;
683                 INIT_LIST_HEAD(&rec->backrefs);
684                 INIT_LIST_HEAD(&rec->orphan_extents);
685                 rec->holes = RB_ROOT;
686
687                 node = malloc(sizeof(*node));
688                 node->cache.start = ino;
689                 node->cache.size = 1;
690                 node->data = rec;
691
692                 if (ino == BTRFS_FREE_INO_OBJECTID)
693                         rec->found_link = 1;
694
695                 ret = insert_cache_extent(inode_cache, &node->cache);
696                 BUG_ON(ret);
697         }
698         return rec;
699 }
700
701 static void free_orphan_data_extents(struct list_head *orphan_extents)
702 {
703         struct orphan_data_extent *orphan;
704
705         while (!list_empty(orphan_extents)) {
706                 orphan = list_entry(orphan_extents->next,
707                                     struct orphan_data_extent, list);
708                 list_del(&orphan->list);
709                 free(orphan);
710         }
711 }
712
713 static void free_inode_rec(struct inode_record *rec)
714 {
715         struct inode_backref *backref;
716
717         if (--rec->refs > 0)
718                 return;
719
720         while (!list_empty(&rec->backrefs)) {
721                 backref = list_entry(rec->backrefs.next,
722                                      struct inode_backref, list);
723                 list_del(&backref->list);
724                 free(backref);
725         }
726         free_orphan_data_extents(&rec->orphan_extents);
727         free_file_extent_holes(&rec->holes);
728         free(rec);
729 }
730
731 static int can_free_inode_rec(struct inode_record *rec)
732 {
733         if (!rec->errors && rec->checked && rec->found_inode_item &&
734             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
735                 return 1;
736         return 0;
737 }
738
739 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
740                                  struct inode_record *rec)
741 {
742         struct cache_extent *cache;
743         struct inode_backref *tmp, *backref;
744         struct ptr_node *node;
745         unsigned char filetype;
746
747         if (!rec->found_inode_item)
748                 return;
749
750         filetype = imode_to_type(rec->imode);
751         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
752                 if (backref->found_dir_item && backref->found_dir_index) {
753                         if (backref->filetype != filetype)
754                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
755                         if (!backref->errors && backref->found_inode_ref) {
756                                 list_del(&backref->list);
757                                 free(backref);
758                         }
759                 }
760         }
761
762         if (!rec->checked || rec->merging)
763                 return;
764
765         if (S_ISDIR(rec->imode)) {
766                 if (rec->found_size != rec->isize)
767                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
768                 if (rec->found_file_extent)
769                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
770         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
771                 if (rec->found_dir_item)
772                         rec->errors |= I_ERR_ODD_DIR_ITEM;
773                 if (rec->found_size != rec->nbytes)
774                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
775                 if (rec->nlink > 0 && !no_holes &&
776                     (rec->extent_end < rec->isize ||
777                      first_extent_gap(&rec->holes) < rec->isize))
778                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
779         }
780
781         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
782                 if (rec->found_csum_item && rec->nodatasum)
783                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
784                 if (rec->some_csum_missing && !rec->nodatasum)
785                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
786         }
787
788         BUG_ON(rec->refs != 1);
789         if (can_free_inode_rec(rec)) {
790                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
791                 node = container_of(cache, struct ptr_node, cache);
792                 BUG_ON(node->data != rec);
793                 remove_cache_extent(inode_cache, &node->cache);
794                 free(node);
795                 free_inode_rec(rec);
796         }
797 }
798
799 static int check_orphan_item(struct btrfs_root *root, u64 ino)
800 {
801         struct btrfs_path path;
802         struct btrfs_key key;
803         int ret;
804
805         key.objectid = BTRFS_ORPHAN_OBJECTID;
806         key.type = BTRFS_ORPHAN_ITEM_KEY;
807         key.offset = ino;
808
809         btrfs_init_path(&path);
810         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
811         btrfs_release_path(&path);
812         if (ret > 0)
813                 ret = -ENOENT;
814         return ret;
815 }
816
817 static int process_inode_item(struct extent_buffer *eb,
818                               int slot, struct btrfs_key *key,
819                               struct shared_node *active_node)
820 {
821         struct inode_record *rec;
822         struct btrfs_inode_item *item;
823
824         rec = active_node->current;
825         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
826         if (rec->found_inode_item) {
827                 rec->errors |= I_ERR_DUP_INODE_ITEM;
828                 return 1;
829         }
830         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
831         rec->nlink = btrfs_inode_nlink(eb, item);
832         rec->isize = btrfs_inode_size(eb, item);
833         rec->nbytes = btrfs_inode_nbytes(eb, item);
834         rec->imode = btrfs_inode_mode(eb, item);
835         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
836                 rec->nodatasum = 1;
837         rec->found_inode_item = 1;
838         if (rec->nlink == 0)
839                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
840         maybe_free_inode_rec(&active_node->inode_cache, rec);
841         return 0;
842 }
843
844 static struct inode_backref *get_inode_backref(struct inode_record *rec,
845                                                 const char *name,
846                                                 int namelen, u64 dir)
847 {
848         struct inode_backref *backref;
849
850         list_for_each_entry(backref, &rec->backrefs, list) {
851                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
852                         break;
853                 if (backref->dir != dir || backref->namelen != namelen)
854                         continue;
855                 if (memcmp(name, backref->name, namelen))
856                         continue;
857                 return backref;
858         }
859
860         backref = malloc(sizeof(*backref) + namelen + 1);
861         memset(backref, 0, sizeof(*backref));
862         backref->dir = dir;
863         backref->namelen = namelen;
864         memcpy(backref->name, name, namelen);
865         backref->name[namelen] = '\0';
866         list_add_tail(&backref->list, &rec->backrefs);
867         return backref;
868 }
869
870 static int add_inode_backref(struct cache_tree *inode_cache,
871                              u64 ino, u64 dir, u64 index,
872                              const char *name, int namelen,
873                              int filetype, int itemtype, int errors)
874 {
875         struct inode_record *rec;
876         struct inode_backref *backref;
877
878         rec = get_inode_rec(inode_cache, ino, 1);
879         backref = get_inode_backref(rec, name, namelen, dir);
880         if (errors)
881                 backref->errors |= errors;
882         if (itemtype == BTRFS_DIR_INDEX_KEY) {
883                 if (backref->found_dir_index)
884                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
885                 if (backref->found_inode_ref && backref->index != index)
886                         backref->errors |= REF_ERR_INDEX_UNMATCH;
887                 if (backref->found_dir_item && backref->filetype != filetype)
888                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
889
890                 backref->index = index;
891                 backref->filetype = filetype;
892                 backref->found_dir_index = 1;
893         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
894                 rec->found_link++;
895                 if (backref->found_dir_item)
896                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
897                 if (backref->found_dir_index && backref->filetype != filetype)
898                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
899
900                 backref->filetype = filetype;
901                 backref->found_dir_item = 1;
902         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
903                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
904                 if (backref->found_inode_ref)
905                         backref->errors |= REF_ERR_DUP_INODE_REF;
906                 if (backref->found_dir_index && backref->index != index)
907                         backref->errors |= REF_ERR_INDEX_UNMATCH;
908                 else
909                         backref->index = index;
910
911                 backref->ref_type = itemtype;
912                 backref->found_inode_ref = 1;
913         } else {
914                 BUG_ON(1);
915         }
916
917         maybe_free_inode_rec(inode_cache, rec);
918         return 0;
919 }
920
921 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
922                             struct cache_tree *dst_cache)
923 {
924         struct inode_backref *backref;
925         u32 dir_count = 0;
926         int ret = 0;
927
928         dst->merging = 1;
929         list_for_each_entry(backref, &src->backrefs, list) {
930                 if (backref->found_dir_index) {
931                         add_inode_backref(dst_cache, dst->ino, backref->dir,
932                                         backref->index, backref->name,
933                                         backref->namelen, backref->filetype,
934                                         BTRFS_DIR_INDEX_KEY, backref->errors);
935                 }
936                 if (backref->found_dir_item) {
937                         dir_count++;
938                         add_inode_backref(dst_cache, dst->ino,
939                                         backref->dir, 0, backref->name,
940                                         backref->namelen, backref->filetype,
941                                         BTRFS_DIR_ITEM_KEY, backref->errors);
942                 }
943                 if (backref->found_inode_ref) {
944                         add_inode_backref(dst_cache, dst->ino,
945                                         backref->dir, backref->index,
946                                         backref->name, backref->namelen, 0,
947                                         backref->ref_type, backref->errors);
948                 }
949         }
950
951         if (src->found_dir_item)
952                 dst->found_dir_item = 1;
953         if (src->found_file_extent)
954                 dst->found_file_extent = 1;
955         if (src->found_csum_item)
956                 dst->found_csum_item = 1;
957         if (src->some_csum_missing)
958                 dst->some_csum_missing = 1;
959         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
960                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
961                 if (ret < 0)
962                         return ret;
963         }
964
965         BUG_ON(src->found_link < dir_count);
966         dst->found_link += src->found_link - dir_count;
967         dst->found_size += src->found_size;
968         if (src->extent_start != (u64)-1) {
969                 if (dst->extent_start == (u64)-1) {
970                         dst->extent_start = src->extent_start;
971                         dst->extent_end = src->extent_end;
972                 } else {
973                         if (dst->extent_end > src->extent_start)
974                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
975                         else if (dst->extent_end < src->extent_start) {
976                                 ret = add_file_extent_hole(&dst->holes,
977                                         dst->extent_end,
978                                         src->extent_start - dst->extent_end);
979                         }
980                         if (dst->extent_end < src->extent_end)
981                                 dst->extent_end = src->extent_end;
982                 }
983         }
984
985         dst->errors |= src->errors;
986         if (src->found_inode_item) {
987                 if (!dst->found_inode_item) {
988                         dst->nlink = src->nlink;
989                         dst->isize = src->isize;
990                         dst->nbytes = src->nbytes;
991                         dst->imode = src->imode;
992                         dst->nodatasum = src->nodatasum;
993                         dst->found_inode_item = 1;
994                 } else {
995                         dst->errors |= I_ERR_DUP_INODE_ITEM;
996                 }
997         }
998         dst->merging = 0;
999
1000         return 0;
1001 }
1002
1003 static int splice_shared_node(struct shared_node *src_node,
1004                               struct shared_node *dst_node)
1005 {
1006         struct cache_extent *cache;
1007         struct ptr_node *node, *ins;
1008         struct cache_tree *src, *dst;
1009         struct inode_record *rec, *conflict;
1010         u64 current_ino = 0;
1011         int splice = 0;
1012         int ret;
1013
1014         if (--src_node->refs == 0)
1015                 splice = 1;
1016         if (src_node->current)
1017                 current_ino = src_node->current->ino;
1018
1019         src = &src_node->root_cache;
1020         dst = &dst_node->root_cache;
1021 again:
1022         cache = search_cache_extent(src, 0);
1023         while (cache) {
1024                 node = container_of(cache, struct ptr_node, cache);
1025                 rec = node->data;
1026                 cache = next_cache_extent(cache);
1027
1028                 if (splice) {
1029                         remove_cache_extent(src, &node->cache);
1030                         ins = node;
1031                 } else {
1032                         ins = malloc(sizeof(*ins));
1033                         ins->cache.start = node->cache.start;
1034                         ins->cache.size = node->cache.size;
1035                         ins->data = rec;
1036                         rec->refs++;
1037                 }
1038                 ret = insert_cache_extent(dst, &ins->cache);
1039                 if (ret == -EEXIST) {
1040                         conflict = get_inode_rec(dst, rec->ino, 1);
1041                         merge_inode_recs(rec, conflict, dst);
1042                         if (rec->checked) {
1043                                 conflict->checked = 1;
1044                                 if (dst_node->current == conflict)
1045                                         dst_node->current = NULL;
1046                         }
1047                         maybe_free_inode_rec(dst, conflict);
1048                         free_inode_rec(rec);
1049                         free(ins);
1050                 } else {
1051                         BUG_ON(ret);
1052                 }
1053         }
1054
1055         if (src == &src_node->root_cache) {
1056                 src = &src_node->inode_cache;
1057                 dst = &dst_node->inode_cache;
1058                 goto again;
1059         }
1060
1061         if (current_ino > 0 && (!dst_node->current ||
1062             current_ino > dst_node->current->ino)) {
1063                 if (dst_node->current) {
1064                         dst_node->current->checked = 1;
1065                         maybe_free_inode_rec(dst, dst_node->current);
1066                 }
1067                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1068         }
1069         return 0;
1070 }
1071
1072 static void free_inode_ptr(struct cache_extent *cache)
1073 {
1074         struct ptr_node *node;
1075         struct inode_record *rec;
1076
1077         node = container_of(cache, struct ptr_node, cache);
1078         rec = node->data;
1079         free_inode_rec(rec);
1080         free(node);
1081 }
1082
1083 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1084
1085 static struct shared_node *find_shared_node(struct cache_tree *shared,
1086                                             u64 bytenr)
1087 {
1088         struct cache_extent *cache;
1089         struct shared_node *node;
1090
1091         cache = lookup_cache_extent(shared, bytenr, 1);
1092         if (cache) {
1093                 node = container_of(cache, struct shared_node, cache);
1094                 return node;
1095         }
1096         return NULL;
1097 }
1098
1099 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1100 {
1101         int ret;
1102         struct shared_node *node;
1103
1104         node = calloc(1, sizeof(*node));
1105         node->cache.start = bytenr;
1106         node->cache.size = 1;
1107         cache_tree_init(&node->root_cache);
1108         cache_tree_init(&node->inode_cache);
1109         node->refs = refs;
1110
1111         ret = insert_cache_extent(shared, &node->cache);
1112         BUG_ON(ret);
1113         return 0;
1114 }
1115
1116 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1117                              struct walk_control *wc, int level)
1118 {
1119         struct shared_node *node;
1120         struct shared_node *dest;
1121
1122         if (level == wc->active_node)
1123                 return 0;
1124
1125         BUG_ON(wc->active_node <= level);
1126         node = find_shared_node(&wc->shared, bytenr);
1127         if (!node) {
1128                 add_shared_node(&wc->shared, bytenr, refs);
1129                 node = find_shared_node(&wc->shared, bytenr);
1130                 wc->nodes[level] = node;
1131                 wc->active_node = level;
1132                 return 0;
1133         }
1134
1135         if (wc->root_level == wc->active_node &&
1136             btrfs_root_refs(&root->root_item) == 0) {
1137                 if (--node->refs == 0) {
1138                         free_inode_recs_tree(&node->root_cache);
1139                         free_inode_recs_tree(&node->inode_cache);
1140                         remove_cache_extent(&wc->shared, &node->cache);
1141                         free(node);
1142                 }
1143                 return 1;
1144         }
1145
1146         dest = wc->nodes[wc->active_node];
1147         splice_shared_node(node, dest);
1148         if (node->refs == 0) {
1149                 remove_cache_extent(&wc->shared, &node->cache);
1150                 free(node);
1151         }
1152         return 1;
1153 }
1154
1155 static int leave_shared_node(struct btrfs_root *root,
1156                              struct walk_control *wc, int level)
1157 {
1158         struct shared_node *node;
1159         struct shared_node *dest;
1160         int i;
1161
1162         if (level == wc->root_level)
1163                 return 0;
1164
1165         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1166                 if (wc->nodes[i])
1167                         break;
1168         }
1169         BUG_ON(i >= BTRFS_MAX_LEVEL);
1170
1171         node = wc->nodes[wc->active_node];
1172         wc->nodes[wc->active_node] = NULL;
1173         wc->active_node = i;
1174
1175         dest = wc->nodes[wc->active_node];
1176         if (wc->active_node < wc->root_level ||
1177             btrfs_root_refs(&root->root_item) > 0) {
1178                 BUG_ON(node->refs <= 1);
1179                 splice_shared_node(node, dest);
1180         } else {
1181                 BUG_ON(node->refs < 2);
1182                 node->refs--;
1183         }
1184         return 0;
1185 }
1186
1187 /*
1188  * Returns:
1189  * < 0 - on error
1190  * 1   - if the root with id child_root_id is a child of root parent_root_id
1191  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1192  *       has other root(s) as parent(s)
1193  * 2   - if the root child_root_id doesn't have any parent roots
1194  */
1195 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1196                          u64 child_root_id)
1197 {
1198         struct btrfs_path path;
1199         struct btrfs_key key;
1200         struct extent_buffer *leaf;
1201         int has_parent = 0;
1202         int ret;
1203
1204         btrfs_init_path(&path);
1205
1206         key.objectid = parent_root_id;
1207         key.type = BTRFS_ROOT_REF_KEY;
1208         key.offset = child_root_id;
1209         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1210                                 0, 0);
1211         if (ret < 0)
1212                 return ret;
1213         btrfs_release_path(&path);
1214         if (!ret)
1215                 return 1;
1216
1217         key.objectid = child_root_id;
1218         key.type = BTRFS_ROOT_BACKREF_KEY;
1219         key.offset = 0;
1220         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1221                                 0, 0);
1222         if (ret < 0)
1223                 goto out;
1224
1225         while (1) {
1226                 leaf = path.nodes[0];
1227                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1228                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1229                         if (ret)
1230                                 break;
1231                         leaf = path.nodes[0];
1232                 }
1233
1234                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1235                 if (key.objectid != child_root_id ||
1236                     key.type != BTRFS_ROOT_BACKREF_KEY)
1237                         break;
1238
1239                 has_parent = 1;
1240
1241                 if (key.offset == parent_root_id) {
1242                         btrfs_release_path(&path);
1243                         return 1;
1244                 }
1245
1246                 path.slots[0]++;
1247         }
1248 out:
1249         btrfs_release_path(&path);
1250         if (ret < 0)
1251                 return ret;
1252         return has_parent ? 0 : 2;
1253 }
1254
1255 static int process_dir_item(struct btrfs_root *root,
1256                             struct extent_buffer *eb,
1257                             int slot, struct btrfs_key *key,
1258                             struct shared_node *active_node)
1259 {
1260         u32 total;
1261         u32 cur = 0;
1262         u32 len;
1263         u32 name_len;
1264         u32 data_len;
1265         int error;
1266         int nritems = 0;
1267         int filetype;
1268         struct btrfs_dir_item *di;
1269         struct inode_record *rec;
1270         struct cache_tree *root_cache;
1271         struct cache_tree *inode_cache;
1272         struct btrfs_key location;
1273         char namebuf[BTRFS_NAME_LEN];
1274
1275         root_cache = &active_node->root_cache;
1276         inode_cache = &active_node->inode_cache;
1277         rec = active_node->current;
1278         rec->found_dir_item = 1;
1279
1280         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1281         total = btrfs_item_size_nr(eb, slot);
1282         while (cur < total) {
1283                 nritems++;
1284                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1285                 name_len = btrfs_dir_name_len(eb, di);
1286                 data_len = btrfs_dir_data_len(eb, di);
1287                 filetype = btrfs_dir_type(eb, di);
1288
1289                 rec->found_size += name_len;
1290                 if (name_len <= BTRFS_NAME_LEN) {
1291                         len = name_len;
1292                         error = 0;
1293                 } else {
1294                         len = BTRFS_NAME_LEN;
1295                         error = REF_ERR_NAME_TOO_LONG;
1296                 }
1297                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1298
1299                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1300                         add_inode_backref(inode_cache, location.objectid,
1301                                           key->objectid, key->offset, namebuf,
1302                                           len, filetype, key->type, error);
1303                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1304                         add_inode_backref(root_cache, location.objectid,
1305                                           key->objectid, key->offset,
1306                                           namebuf, len, filetype,
1307                                           key->type, error);
1308                 } else {
1309                         fprintf(stderr, "invalid location in dir item %u\n",
1310                                 location.type);
1311                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1312                                           key->objectid, key->offset, namebuf,
1313                                           len, filetype, key->type, error);
1314                 }
1315
1316                 len = sizeof(*di) + name_len + data_len;
1317                 di = (struct btrfs_dir_item *)((char *)di + len);
1318                 cur += len;
1319         }
1320         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1321                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1322
1323         return 0;
1324 }
1325
1326 static int process_inode_ref(struct extent_buffer *eb,
1327                              int slot, struct btrfs_key *key,
1328                              struct shared_node *active_node)
1329 {
1330         u32 total;
1331         u32 cur = 0;
1332         u32 len;
1333         u32 name_len;
1334         u64 index;
1335         int error;
1336         struct cache_tree *inode_cache;
1337         struct btrfs_inode_ref *ref;
1338         char namebuf[BTRFS_NAME_LEN];
1339
1340         inode_cache = &active_node->inode_cache;
1341
1342         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1343         total = btrfs_item_size_nr(eb, slot);
1344         while (cur < total) {
1345                 name_len = btrfs_inode_ref_name_len(eb, ref);
1346                 index = btrfs_inode_ref_index(eb, ref);
1347                 if (name_len <= BTRFS_NAME_LEN) {
1348                         len = name_len;
1349                         error = 0;
1350                 } else {
1351                         len = BTRFS_NAME_LEN;
1352                         error = REF_ERR_NAME_TOO_LONG;
1353                 }
1354                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1355                 add_inode_backref(inode_cache, key->objectid, key->offset,
1356                                   index, namebuf, len, 0, key->type, error);
1357
1358                 len = sizeof(*ref) + name_len;
1359                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1360                 cur += len;
1361         }
1362         return 0;
1363 }
1364
1365 static int process_inode_extref(struct extent_buffer *eb,
1366                                 int slot, struct btrfs_key *key,
1367                                 struct shared_node *active_node)
1368 {
1369         u32 total;
1370         u32 cur = 0;
1371         u32 len;
1372         u32 name_len;
1373         u64 index;
1374         u64 parent;
1375         int error;
1376         struct cache_tree *inode_cache;
1377         struct btrfs_inode_extref *extref;
1378         char namebuf[BTRFS_NAME_LEN];
1379
1380         inode_cache = &active_node->inode_cache;
1381
1382         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1383         total = btrfs_item_size_nr(eb, slot);
1384         while (cur < total) {
1385                 name_len = btrfs_inode_extref_name_len(eb, extref);
1386                 index = btrfs_inode_extref_index(eb, extref);
1387                 parent = btrfs_inode_extref_parent(eb, extref);
1388                 if (name_len <= BTRFS_NAME_LEN) {
1389                         len = name_len;
1390                         error = 0;
1391                 } else {
1392                         len = BTRFS_NAME_LEN;
1393                         error = REF_ERR_NAME_TOO_LONG;
1394                 }
1395                 read_extent_buffer(eb, namebuf,
1396                                    (unsigned long)(extref + 1), len);
1397                 add_inode_backref(inode_cache, key->objectid, parent,
1398                                   index, namebuf, len, 0, key->type, error);
1399
1400                 len = sizeof(*extref) + name_len;
1401                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1402                 cur += len;
1403         }
1404         return 0;
1405
1406 }
1407
1408 static int count_csum_range(struct btrfs_root *root, u64 start,
1409                             u64 len, u64 *found)
1410 {
1411         struct btrfs_key key;
1412         struct btrfs_path path;
1413         struct extent_buffer *leaf;
1414         int ret;
1415         size_t size;
1416         *found = 0;
1417         u64 csum_end;
1418         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1419
1420         btrfs_init_path(&path);
1421
1422         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1423         key.offset = start;
1424         key.type = BTRFS_EXTENT_CSUM_KEY;
1425
1426         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1427                                 &key, &path, 0, 0);
1428         if (ret < 0)
1429                 goto out;
1430         if (ret > 0 && path.slots[0] > 0) {
1431                 leaf = path.nodes[0];
1432                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1433                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1434                     key.type == BTRFS_EXTENT_CSUM_KEY)
1435                         path.slots[0]--;
1436         }
1437
1438         while (len > 0) {
1439                 leaf = path.nodes[0];
1440                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1441                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1442                         if (ret > 0)
1443                                 break;
1444                         else if (ret < 0)
1445                                 goto out;
1446                         leaf = path.nodes[0];
1447                 }
1448
1449                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1450                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1451                     key.type != BTRFS_EXTENT_CSUM_KEY)
1452                         break;
1453
1454                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1455                 if (key.offset >= start + len)
1456                         break;
1457
1458                 if (key.offset > start)
1459                         start = key.offset;
1460
1461                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1462                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1463                 if (csum_end > start) {
1464                         size = min(csum_end - start, len);
1465                         len -= size;
1466                         start += size;
1467                         *found += size;
1468                 }
1469
1470                 path.slots[0]++;
1471         }
1472 out:
1473         btrfs_release_path(&path);
1474         if (ret < 0)
1475                 return ret;
1476         return 0;
1477 }
1478
1479 static int process_file_extent(struct btrfs_root *root,
1480                                 struct extent_buffer *eb,
1481                                 int slot, struct btrfs_key *key,
1482                                 struct shared_node *active_node)
1483 {
1484         struct inode_record *rec;
1485         struct btrfs_file_extent_item *fi;
1486         u64 num_bytes = 0;
1487         u64 disk_bytenr = 0;
1488         u64 extent_offset = 0;
1489         u64 mask = root->sectorsize - 1;
1490         int extent_type;
1491         int ret;
1492
1493         rec = active_node->current;
1494         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1495         rec->found_file_extent = 1;
1496
1497         if (rec->extent_start == (u64)-1) {
1498                 rec->extent_start = key->offset;
1499                 rec->extent_end = key->offset;
1500         }
1501
1502         if (rec->extent_end > key->offset)
1503                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1504         else if (rec->extent_end < key->offset) {
1505                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1506                                            key->offset - rec->extent_end);
1507                 if (ret < 0)
1508                         return ret;
1509         }
1510
1511         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1512         extent_type = btrfs_file_extent_type(eb, fi);
1513
1514         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1515                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1516                 if (num_bytes == 0)
1517                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1518                 rec->found_size += num_bytes;
1519                 num_bytes = (num_bytes + mask) & ~mask;
1520         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1521                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1522                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1523                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1524                 extent_offset = btrfs_file_extent_offset(eb, fi);
1525                 if (num_bytes == 0 || (num_bytes & mask))
1526                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1527                 if (num_bytes + extent_offset >
1528                     btrfs_file_extent_ram_bytes(eb, fi))
1529                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1530                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1531                     (btrfs_file_extent_compression(eb, fi) ||
1532                      btrfs_file_extent_encryption(eb, fi) ||
1533                      btrfs_file_extent_other_encoding(eb, fi)))
1534                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1535                 if (disk_bytenr > 0)
1536                         rec->found_size += num_bytes;
1537         } else {
1538                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1539         }
1540         rec->extent_end = key->offset + num_bytes;
1541
1542         /*
1543          * The data reloc tree will copy full extents into its inode and then
1544          * copy the corresponding csums.  Because the extent it copied could be
1545          * a preallocated extent that hasn't been written to yet there may be no
1546          * csums to copy, ergo we won't have csums for our file extent.  This is
1547          * ok so just don't bother checking csums if the inode belongs to the
1548          * data reloc tree.
1549          */
1550         if (disk_bytenr > 0 &&
1551             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1552                 u64 found;
1553                 if (btrfs_file_extent_compression(eb, fi))
1554                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1555                 else
1556                         disk_bytenr += extent_offset;
1557
1558                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1559                 if (ret < 0)
1560                         return ret;
1561                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1562                         if (found > 0)
1563                                 rec->found_csum_item = 1;
1564                         if (found < num_bytes)
1565                                 rec->some_csum_missing = 1;
1566                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1567                         if (found > 0)
1568                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1569                 }
1570         }
1571         return 0;
1572 }
1573
1574 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1575                             struct walk_control *wc)
1576 {
1577         struct btrfs_key key;
1578         u32 nritems;
1579         int i;
1580         int ret = 0;
1581         struct cache_tree *inode_cache;
1582         struct shared_node *active_node;
1583
1584         if (wc->root_level == wc->active_node &&
1585             btrfs_root_refs(&root->root_item) == 0)
1586                 return 0;
1587
1588         active_node = wc->nodes[wc->active_node];
1589         inode_cache = &active_node->inode_cache;
1590         nritems = btrfs_header_nritems(eb);
1591         for (i = 0; i < nritems; i++) {
1592                 btrfs_item_key_to_cpu(eb, &key, i);
1593
1594                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1595                         continue;
1596                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1597                         continue;
1598
1599                 if (active_node->current == NULL ||
1600                     active_node->current->ino < key.objectid) {
1601                         if (active_node->current) {
1602                                 active_node->current->checked = 1;
1603                                 maybe_free_inode_rec(inode_cache,
1604                                                      active_node->current);
1605                         }
1606                         active_node->current = get_inode_rec(inode_cache,
1607                                                              key.objectid, 1);
1608                 }
1609                 switch (key.type) {
1610                 case BTRFS_DIR_ITEM_KEY:
1611                 case BTRFS_DIR_INDEX_KEY:
1612                         ret = process_dir_item(root, eb, i, &key, active_node);
1613                         break;
1614                 case BTRFS_INODE_REF_KEY:
1615                         ret = process_inode_ref(eb, i, &key, active_node);
1616                         break;
1617                 case BTRFS_INODE_EXTREF_KEY:
1618                         ret = process_inode_extref(eb, i, &key, active_node);
1619                         break;
1620                 case BTRFS_INODE_ITEM_KEY:
1621                         ret = process_inode_item(eb, i, &key, active_node);
1622                         break;
1623                 case BTRFS_EXTENT_DATA_KEY:
1624                         ret = process_file_extent(root, eb, i, &key,
1625                                                   active_node);
1626                         break;
1627                 default:
1628                         break;
1629                 };
1630         }
1631         return ret;
1632 }
1633
1634 static void reada_walk_down(struct btrfs_root *root,
1635                             struct extent_buffer *node, int slot)
1636 {
1637         u64 bytenr;
1638         u64 ptr_gen;
1639         u32 nritems;
1640         u32 blocksize;
1641         int i;
1642         int level;
1643
1644         level = btrfs_header_level(node);
1645         if (level != 1)
1646                 return;
1647
1648         nritems = btrfs_header_nritems(node);
1649         blocksize = btrfs_level_size(root, level - 1);
1650         for (i = slot; i < nritems; i++) {
1651                 bytenr = btrfs_node_blockptr(node, i);
1652                 ptr_gen = btrfs_node_ptr_generation(node, i);
1653                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1654         }
1655 }
1656
1657 /*
1658  * Check the child node/leaf by the following condition:
1659  * 1. the first item key of the node/leaf should be the same with the one
1660  *    in parent.
1661  * 2. block in parent node should match the child node/leaf.
1662  * 3. generation of parent node and child's header should be consistent.
1663  *
1664  * Or the child node/leaf pointed by the key in parent is not valid.
1665  *
1666  * We hope to check leaf owner too, but since subvol may share leaves,
1667  * which makes leaf owner check not so strong, key check should be
1668  * sufficient enough for that case.
1669  */
1670 static int check_child_node(struct btrfs_root *root,
1671                             struct extent_buffer *parent, int slot,
1672                             struct extent_buffer *child)
1673 {
1674         struct btrfs_key parent_key;
1675         struct btrfs_key child_key;
1676         int ret = 0;
1677
1678         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1679         if (btrfs_header_level(child) == 0)
1680                 btrfs_item_key_to_cpu(child, &child_key, 0);
1681         else
1682                 btrfs_node_key_to_cpu(child, &child_key, 0);
1683
1684         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1685                 ret = -EINVAL;
1686                 fprintf(stderr,
1687                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1688                         parent_key.objectid, parent_key.type, parent_key.offset,
1689                         child_key.objectid, child_key.type, child_key.offset);
1690         }
1691         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1692                 ret = -EINVAL;
1693                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1694                         btrfs_node_blockptr(parent, slot),
1695                         btrfs_header_bytenr(child));
1696         }
1697         if (btrfs_node_ptr_generation(parent, slot) !=
1698             btrfs_header_generation(child)) {
1699                 ret = -EINVAL;
1700                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1701                         btrfs_header_generation(child),
1702                         btrfs_node_ptr_generation(parent, slot));
1703         }
1704         return ret;
1705 }
1706
1707 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1708                           struct walk_control *wc, int *level)
1709 {
1710         enum btrfs_tree_block_status status;
1711         u64 bytenr;
1712         u64 ptr_gen;
1713         struct extent_buffer *next;
1714         struct extent_buffer *cur;
1715         u32 blocksize;
1716         int ret, err = 0;
1717         u64 refs;
1718
1719         WARN_ON(*level < 0);
1720         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1721         ret = btrfs_lookup_extent_info(NULL, root,
1722                                        path->nodes[*level]->start,
1723                                        *level, 1, &refs, NULL);
1724         if (ret < 0) {
1725                 err = ret;
1726                 goto out;
1727         }
1728
1729         if (refs > 1) {
1730                 ret = enter_shared_node(root, path->nodes[*level]->start,
1731                                         refs, wc, *level);
1732                 if (ret > 0) {
1733                         err = ret;
1734                         goto out;
1735                 }
1736         }
1737
1738         while (*level >= 0) {
1739                 WARN_ON(*level < 0);
1740                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1741                 cur = path->nodes[*level];
1742
1743                 if (btrfs_header_level(cur) != *level)
1744                         WARN_ON(1);
1745
1746                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1747                         break;
1748                 if (*level == 0) {
1749                         ret = process_one_leaf(root, cur, wc);
1750                         if (ret < 0)
1751                                 err = ret;
1752                         break;
1753                 }
1754                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1755                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1756                 blocksize = btrfs_level_size(root, *level - 1);
1757                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1758                                                1, &refs, NULL);
1759                 if (ret < 0)
1760                         refs = 0;
1761
1762                 if (refs > 1) {
1763                         ret = enter_shared_node(root, bytenr, refs,
1764                                                 wc, *level - 1);
1765                         if (ret > 0) {
1766                                 path->slots[*level]++;
1767                                 continue;
1768                         }
1769                 }
1770
1771                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1772                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1773                         free_extent_buffer(next);
1774                         reada_walk_down(root, cur, path->slots[*level]);
1775                         next = read_tree_block(root, bytenr, blocksize,
1776                                                ptr_gen);
1777                         if (!extent_buffer_uptodate(next)) {
1778                                 struct btrfs_key node_key;
1779
1780                                 btrfs_node_key_to_cpu(path->nodes[*level],
1781                                                       &node_key,
1782                                                       path->slots[*level]);
1783                                 btrfs_add_corrupt_extent_record(root->fs_info,
1784                                                 &node_key,
1785                                                 path->nodes[*level]->start,
1786                                                 root->leafsize, *level);
1787                                 err = -EIO;
1788                                 goto out;
1789                         }
1790                 }
1791
1792                 ret = check_child_node(root, cur, path->slots[*level], next);
1793                 if (ret) {
1794                         err = ret;
1795                         goto out;
1796                 }
1797
1798                 if (btrfs_is_leaf(next))
1799                         status = btrfs_check_leaf(root, NULL, next);
1800                 else
1801                         status = btrfs_check_node(root, NULL, next);
1802                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1803                         free_extent_buffer(next);
1804                         err = -EIO;
1805                         goto out;
1806                 }
1807
1808                 *level = *level - 1;
1809                 free_extent_buffer(path->nodes[*level]);
1810                 path->nodes[*level] = next;
1811                 path->slots[*level] = 0;
1812         }
1813 out:
1814         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1815         return err;
1816 }
1817
1818 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1819                         struct walk_control *wc, int *level)
1820 {
1821         int i;
1822         struct extent_buffer *leaf;
1823
1824         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1825                 leaf = path->nodes[i];
1826                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1827                         path->slots[i]++;
1828                         *level = i;
1829                         return 0;
1830                 } else {
1831                         free_extent_buffer(path->nodes[*level]);
1832                         path->nodes[*level] = NULL;
1833                         BUG_ON(*level > wc->active_node);
1834                         if (*level == wc->active_node)
1835                                 leave_shared_node(root, wc, *level);
1836                         *level = i + 1;
1837                 }
1838         }
1839         return 1;
1840 }
1841
1842 static int check_root_dir(struct inode_record *rec)
1843 {
1844         struct inode_backref *backref;
1845         int ret = -1;
1846
1847         if (!rec->found_inode_item || rec->errors)
1848                 goto out;
1849         if (rec->nlink != 1 || rec->found_link != 0)
1850                 goto out;
1851         if (list_empty(&rec->backrefs))
1852                 goto out;
1853         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1854         if (!backref->found_inode_ref)
1855                 goto out;
1856         if (backref->index != 0 || backref->namelen != 2 ||
1857             memcmp(backref->name, "..", 2))
1858                 goto out;
1859         if (backref->found_dir_index || backref->found_dir_item)
1860                 goto out;
1861         ret = 0;
1862 out:
1863         return ret;
1864 }
1865
1866 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1867                               struct btrfs_root *root, struct btrfs_path *path,
1868                               struct inode_record *rec)
1869 {
1870         struct btrfs_inode_item *ei;
1871         struct btrfs_key key;
1872         int ret;
1873
1874         key.objectid = rec->ino;
1875         key.type = BTRFS_INODE_ITEM_KEY;
1876         key.offset = (u64)-1;
1877
1878         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1879         if (ret < 0)
1880                 goto out;
1881         if (ret) {
1882                 if (!path->slots[0]) {
1883                         ret = -ENOENT;
1884                         goto out;
1885                 }
1886                 path->slots[0]--;
1887                 ret = 0;
1888         }
1889         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1890         if (key.objectid != rec->ino) {
1891                 ret = -ENOENT;
1892                 goto out;
1893         }
1894
1895         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1896                             struct btrfs_inode_item);
1897         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1898         btrfs_mark_buffer_dirty(path->nodes[0]);
1899         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1900         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1901                root->root_key.objectid);
1902 out:
1903         btrfs_release_path(path);
1904         return ret;
1905 }
1906
1907 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1908                                     struct btrfs_root *root,
1909                                     struct btrfs_path *path,
1910                                     struct inode_record *rec)
1911 {
1912         int ret;
1913
1914         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1915         btrfs_release_path(path);
1916         if (!ret)
1917                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1918         return ret;
1919 }
1920
1921 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1922                                struct btrfs_root *root,
1923                                struct btrfs_path *path,
1924                                struct inode_record *rec)
1925 {
1926         struct btrfs_inode_item *ei;
1927         struct btrfs_key key;
1928         int ret = 0;
1929
1930         key.objectid = rec->ino;
1931         key.type = BTRFS_INODE_ITEM_KEY;
1932         key.offset = 0;
1933
1934         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1935         if (ret) {
1936                 if (ret > 0)
1937                         ret = -ENOENT;
1938                 goto out;
1939         }
1940
1941         /* Since ret == 0, no need to check anything */
1942         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1943                             struct btrfs_inode_item);
1944         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
1945         btrfs_mark_buffer_dirty(path->nodes[0]);
1946         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
1947         printf("reset nbytes for ino %llu root %llu\n",
1948                rec->ino, root->root_key.objectid);
1949 out:
1950         btrfs_release_path(path);
1951         return ret;
1952 }
1953
1954 static int add_missing_dir_index(struct btrfs_root *root,
1955                                  struct cache_tree *inode_cache,
1956                                  struct inode_record *rec,
1957                                  struct inode_backref *backref)
1958 {
1959         struct btrfs_path *path;
1960         struct btrfs_trans_handle *trans;
1961         struct btrfs_dir_item *dir_item;
1962         struct extent_buffer *leaf;
1963         struct btrfs_key key;
1964         struct btrfs_disk_key disk_key;
1965         struct inode_record *dir_rec;
1966         unsigned long name_ptr;
1967         u32 data_size = sizeof(*dir_item) + backref->namelen;
1968         int ret;
1969
1970         path = btrfs_alloc_path();
1971         if (!path)
1972                 return -ENOMEM;
1973
1974         trans = btrfs_start_transaction(root, 1);
1975         if (IS_ERR(trans)) {
1976                 btrfs_free_path(path);
1977                 return PTR_ERR(trans);
1978         }
1979
1980         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
1981                 (unsigned long long)rec->ino);
1982         key.objectid = backref->dir;
1983         key.type = BTRFS_DIR_INDEX_KEY;
1984         key.offset = backref->index;
1985
1986         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
1987         BUG_ON(ret);
1988
1989         leaf = path->nodes[0];
1990         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
1991
1992         disk_key.objectid = cpu_to_le64(rec->ino);
1993         disk_key.type = BTRFS_INODE_ITEM_KEY;
1994         disk_key.offset = 0;
1995
1996         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
1997         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
1998         btrfs_set_dir_data_len(leaf, dir_item, 0);
1999         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2000         name_ptr = (unsigned long)(dir_item + 1);
2001         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2002         btrfs_mark_buffer_dirty(leaf);
2003         btrfs_free_path(path);
2004         btrfs_commit_transaction(trans, root);
2005
2006         backref->found_dir_index = 1;
2007         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2008         if (!dir_rec)
2009                 return 0;
2010         dir_rec->found_size += backref->namelen;
2011         if (dir_rec->found_size == dir_rec->isize &&
2012             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2013                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2014         if (dir_rec->found_size != dir_rec->isize)
2015                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2016
2017         return 0;
2018 }
2019
2020 static int delete_dir_index(struct btrfs_root *root,
2021                             struct cache_tree *inode_cache,
2022                             struct inode_record *rec,
2023                             struct inode_backref *backref)
2024 {
2025         struct btrfs_trans_handle *trans;
2026         struct btrfs_dir_item *di;
2027         struct btrfs_path *path;
2028         int ret = 0;
2029
2030         path = btrfs_alloc_path();
2031         if (!path)
2032                 return -ENOMEM;
2033
2034         trans = btrfs_start_transaction(root, 1);
2035         if (IS_ERR(trans)) {
2036                 btrfs_free_path(path);
2037                 return PTR_ERR(trans);
2038         }
2039
2040
2041         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2042                 (unsigned long long)backref->dir,
2043                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2044                 (unsigned long long)root->objectid);
2045
2046         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2047                                     backref->name, backref->namelen,
2048                                     backref->index, -1);
2049         if (IS_ERR(di)) {
2050                 ret = PTR_ERR(di);
2051                 btrfs_free_path(path);
2052                 btrfs_commit_transaction(trans, root);
2053                 if (ret == -ENOENT)
2054                         return 0;
2055                 return ret;
2056         }
2057
2058         if (!di)
2059                 ret = btrfs_del_item(trans, root, path);
2060         else
2061                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2062         BUG_ON(ret);
2063         btrfs_free_path(path);
2064         btrfs_commit_transaction(trans, root);
2065         return ret;
2066 }
2067
2068 static int create_inode_item(struct btrfs_root *root,
2069                              struct inode_record *rec,
2070                              struct inode_backref *backref, int root_dir)
2071 {
2072         struct btrfs_trans_handle *trans;
2073         struct btrfs_inode_item inode_item;
2074         time_t now = time(NULL);
2075         int ret;
2076
2077         trans = btrfs_start_transaction(root, 1);
2078         if (IS_ERR(trans)) {
2079                 ret = PTR_ERR(trans);
2080                 return ret;
2081         }
2082
2083         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2084                 "be incomplete, please check permissions and content after "
2085                 "the fsck completes.\n", (unsigned long long)root->objectid,
2086                 (unsigned long long)rec->ino);
2087
2088         memset(&inode_item, 0, sizeof(inode_item));
2089         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2090         if (root_dir)
2091                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2092         else
2093                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2094         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2095         if (rec->found_dir_item) {
2096                 if (rec->found_file_extent)
2097                         fprintf(stderr, "root %llu inode %llu has both a dir "
2098                                 "item and extents, unsure if it is a dir or a "
2099                                 "regular file so setting it as a directory\n",
2100                                 (unsigned long long)root->objectid,
2101                                 (unsigned long long)rec->ino);
2102                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2103                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2104         } else if (!rec->found_dir_item) {
2105                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2106                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2107         }
2108         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2109         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2110         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2111         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2112         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2113         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2114         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2115         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2116
2117         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2118         BUG_ON(ret);
2119         btrfs_commit_transaction(trans, root);
2120         return 0;
2121 }
2122
2123 static int repair_inode_backrefs(struct btrfs_root *root,
2124                                  struct inode_record *rec,
2125                                  struct cache_tree *inode_cache,
2126                                  int delete)
2127 {
2128         struct inode_backref *tmp, *backref;
2129         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2130         int ret = 0;
2131         int repaired = 0;
2132
2133         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2134                 if (!delete && rec->ino == root_dirid) {
2135                         if (!rec->found_inode_item) {
2136                                 ret = create_inode_item(root, rec, backref, 1);
2137                                 if (ret)
2138                                         break;
2139                                 repaired++;
2140                         }
2141                 }
2142
2143                 /* Index 0 for root dir's are special, don't mess with it */
2144                 if (rec->ino == root_dirid && backref->index == 0)
2145                         continue;
2146
2147                 if (delete &&
2148                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2149                      (backref->found_dir_index && backref->found_inode_ref &&
2150                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2151                         ret = delete_dir_index(root, inode_cache, rec, backref);
2152                         if (ret)
2153                                 break;
2154                         repaired++;
2155                         list_del(&backref->list);
2156                         free(backref);
2157                 }
2158
2159                 if (!delete && !backref->found_dir_index &&
2160                     backref->found_dir_item && backref->found_inode_ref) {
2161                         ret = add_missing_dir_index(root, inode_cache, rec,
2162                                                     backref);
2163                         if (ret)
2164                                 break;
2165                         repaired++;
2166                         if (backref->found_dir_item &&
2167                             backref->found_dir_index &&
2168                             backref->found_dir_index) {
2169                                 if (!backref->errors &&
2170                                     backref->found_inode_ref) {
2171                                         list_del(&backref->list);
2172                                         free(backref);
2173                                 }
2174                         }
2175                 }
2176
2177                 if (!delete && (!backref->found_dir_index &&
2178                                 !backref->found_dir_item &&
2179                                 backref->found_inode_ref)) {
2180                         struct btrfs_trans_handle *trans;
2181                         struct btrfs_key location;
2182
2183                         ret = check_dir_conflict(root, backref->name,
2184                                                  backref->namelen,
2185                                                  backref->dir,
2186                                                  backref->index);
2187                         if (ret) {
2188                                 /*
2189                                  * let nlink fixing routine to handle it,
2190                                  * which can do it better.
2191                                  */
2192                                 ret = 0;
2193                                 break;
2194                         }
2195                         location.objectid = rec->ino;
2196                         location.type = BTRFS_INODE_ITEM_KEY;
2197                         location.offset = 0;
2198
2199                         trans = btrfs_start_transaction(root, 1);
2200                         if (IS_ERR(trans)) {
2201                                 ret = PTR_ERR(trans);
2202                                 break;
2203                         }
2204                         fprintf(stderr, "adding missing dir index/item pair "
2205                                 "for inode %llu\n",
2206                                 (unsigned long long)rec->ino);
2207                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2208                                                     backref->namelen,
2209                                                     backref->dir, &location,
2210                                                     imode_to_type(rec->imode),
2211                                                     backref->index);
2212                         BUG_ON(ret);
2213                         btrfs_commit_transaction(trans, root);
2214                         repaired++;
2215                 }
2216
2217                 if (!delete && (backref->found_inode_ref &&
2218                                 backref->found_dir_index &&
2219                                 backref->found_dir_item &&
2220                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2221                                 !rec->found_inode_item)) {
2222                         ret = create_inode_item(root, rec, backref, 0);
2223                         if (ret)
2224                                 break;
2225                         repaired++;
2226                 }
2227
2228         }
2229         return ret ? ret : repaired;
2230 }
2231
2232 /*
2233  * To determine the file type for nlink/inode_item repair
2234  *
2235  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2236  * Return -ENOENT if file type is not found.
2237  */
2238 static int find_file_type(struct inode_record *rec, u8 *type)
2239 {
2240         struct inode_backref *backref;
2241
2242         /* For inode item recovered case */
2243         if (rec->found_inode_item) {
2244                 *type = imode_to_type(rec->imode);
2245                 return 0;
2246         }
2247
2248         list_for_each_entry(backref, &rec->backrefs, list) {
2249                 if (backref->found_dir_index || backref->found_dir_item) {
2250                         *type = backref->filetype;
2251                         return 0;
2252                 }
2253         }
2254         return -ENOENT;
2255 }
2256
2257 /*
2258  * To determine the file name for nlink repair
2259  *
2260  * Return 0 if file name is found, set name and namelen.
2261  * Return -ENOENT if file name is not found.
2262  */
2263 static int find_file_name(struct inode_record *rec,
2264                           char *name, int *namelen)
2265 {
2266         struct inode_backref *backref;
2267
2268         list_for_each_entry(backref, &rec->backrefs, list) {
2269                 if (backref->found_dir_index || backref->found_dir_item ||
2270                     backref->found_inode_ref) {
2271                         memcpy(name, backref->name, backref->namelen);
2272                         *namelen = backref->namelen;
2273                         return 0;
2274                 }
2275         }
2276         return -ENOENT;
2277 }
2278
2279 /* Reset the nlink of the inode to the correct one */
2280 static int reset_nlink(struct btrfs_trans_handle *trans,
2281                        struct btrfs_root *root,
2282                        struct btrfs_path *path,
2283                        struct inode_record *rec)
2284 {
2285         struct inode_backref *backref;
2286         struct inode_backref *tmp;
2287         struct btrfs_key key;
2288         struct btrfs_inode_item *inode_item;
2289         int ret = 0;
2290
2291         /* We don't believe this either, reset it and iterate backref */
2292         rec->found_link = 0;
2293
2294         /* Remove all backref including the valid ones */
2295         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2296                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2297                                    backref->index, backref->name,
2298                                    backref->namelen, 0);
2299                 if (ret < 0)
2300                         goto out;
2301
2302                 /* remove invalid backref, so it won't be added back */
2303                 if (!(backref->found_dir_index &&
2304                       backref->found_dir_item &&
2305                       backref->found_inode_ref)) {
2306                         list_del(&backref->list);
2307                         free(backref);
2308                 } else {
2309                         rec->found_link++;
2310                 }
2311         }
2312
2313         /* Set nlink to 0 */
2314         key.objectid = rec->ino;
2315         key.type = BTRFS_INODE_ITEM_KEY;
2316         key.offset = 0;
2317         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2318         if (ret < 0)
2319                 goto out;
2320         if (ret > 0) {
2321                 ret = -ENOENT;
2322                 goto out;
2323         }
2324         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2325                                     struct btrfs_inode_item);
2326         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2327         btrfs_mark_buffer_dirty(path->nodes[0]);
2328         btrfs_release_path(path);
2329
2330         /*
2331          * Add back valid inode_ref/dir_item/dir_index,
2332          * add_link() will handle the nlink inc, so new nlink must be correct
2333          */
2334         list_for_each_entry(backref, &rec->backrefs, list) {
2335                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2336                                      backref->name, backref->namelen,
2337                                      backref->ref_type, &backref->index, 1);
2338                 if (ret < 0)
2339                         goto out;
2340         }
2341 out:
2342         btrfs_release_path(path);
2343         return ret;
2344 }
2345
2346 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2347                                struct btrfs_root *root,
2348                                struct btrfs_path *path,
2349                                struct inode_record *rec)
2350 {
2351         char *dir_name = "lost+found";
2352         char namebuf[BTRFS_NAME_LEN] = {0};
2353         u64 lost_found_ino;
2354         u32 mode = 0700;
2355         u8 type = 0;
2356         int namelen = 0;
2357         int name_recovered = 0;
2358         int type_recovered = 0;
2359         int ret = 0;
2360
2361         /*
2362          * Get file name and type first before these invalid inode ref
2363          * are deleted by remove_all_invalid_backref()
2364          */
2365         name_recovered = !find_file_name(rec, namebuf, &namelen);
2366         type_recovered = !find_file_type(rec, &type);
2367
2368         if (!name_recovered) {
2369                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2370                        rec->ino, rec->ino);
2371                 namelen = count_digits(rec->ino);
2372                 sprintf(namebuf, "%llu", rec->ino);
2373                 name_recovered = 1;
2374         }
2375         if (!type_recovered) {
2376                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2377                        rec->ino);
2378                 type = BTRFS_FT_REG_FILE;
2379                 type_recovered = 1;
2380         }
2381
2382         ret = reset_nlink(trans, root, path, rec);
2383         if (ret < 0) {
2384                 fprintf(stderr,
2385                         "Failed to reset nlink for inode %llu: %s\n",
2386                         rec->ino, strerror(-ret));
2387                 goto out;
2388         }
2389
2390         if (rec->found_link == 0) {
2391                 lost_found_ino = root->highest_inode;
2392                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2393                         ret = -EOVERFLOW;
2394                         goto out;
2395                 }
2396                 lost_found_ino++;
2397                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2398                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2399                                   mode);
2400                 if (ret < 0) {
2401                         fprintf(stderr, "Failed to create '%s' dir: %s",
2402                                 dir_name, strerror(-ret));
2403                         goto out;
2404                 }
2405                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2406                                      namebuf, namelen, type, NULL, 1);
2407                 /*
2408                  * Add ".INO" suffix several times to handle case where
2409                  * "FILENAME.INO" is already taken by another file.
2410                  */
2411                 while (ret == -EEXIST) {
2412                         /*
2413                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2414                          */
2415                         if (namelen + count_digits(rec->ino) + 1 >
2416                             BTRFS_NAME_LEN) {
2417                                 ret = -EFBIG;
2418                                 goto out;
2419                         }
2420                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2421                                  ".%llu", rec->ino);
2422                         namelen += count_digits(rec->ino) + 1;
2423                         ret = btrfs_add_link(trans, root, rec->ino,
2424                                              lost_found_ino, namebuf,
2425                                              namelen, type, NULL, 1);
2426                 }
2427                 if (ret < 0) {
2428                         fprintf(stderr,
2429                                 "Failed to link the inode %llu to %s dir: %s",
2430                                 rec->ino, dir_name, strerror(-ret));
2431                         goto out;
2432                 }
2433                 /*
2434                  * Just increase the found_link, don't actually add the
2435                  * backref. This will make things easier and this inode
2436                  * record will be freed after the repair is done.
2437                  * So fsck will not report problem about this inode.
2438                  */
2439                 rec->found_link++;
2440                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2441                        namelen, namebuf, dir_name);
2442         }
2443         printf("Fixed the nlink of inode %llu\n", rec->ino);
2444 out:
2445         /*
2446          * Clear the flag anyway, or we will loop forever for the same inode
2447          * as it will not be removed from the bad inode list and the dead loop
2448          * happens.
2449          */
2450         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2451         btrfs_release_path(path);
2452         return ret;
2453 }
2454
2455 /*
2456  * Check if there is any normal(reg or prealloc) file extent for given
2457  * ino.
2458  * This is used to determine the file type when neither its dir_index/item or
2459  * inode_item exists.
2460  *
2461  * This will *NOT* report error, if any error happens, just consider it does
2462  * not have any normal file extent.
2463  */
2464 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2465 {
2466         struct btrfs_path *path;
2467         struct btrfs_key key;
2468         struct btrfs_key found_key;
2469         struct btrfs_file_extent_item *fi;
2470         u8 type;
2471         int ret = 0;
2472
2473         path = btrfs_alloc_path();
2474         if (!path)
2475                 goto out;
2476         key.objectid = ino;
2477         key.type = BTRFS_EXTENT_DATA_KEY;
2478         key.offset = 0;
2479
2480         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2481         if (ret < 0) {
2482                 ret = 0;
2483                 goto out;
2484         }
2485         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2486                 ret = btrfs_next_leaf(root, path);
2487                 if (ret) {
2488                         ret = 0;
2489                         goto out;
2490                 }
2491         }
2492         while (1) {
2493                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2494                                       path->slots[0]);
2495                 if (found_key.objectid != ino ||
2496                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2497                         break;
2498                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2499                                     struct btrfs_file_extent_item);
2500                 type = btrfs_file_extent_type(path->nodes[0], fi);
2501                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2502                         ret = 1;
2503                         goto out;
2504                 }
2505         }
2506 out:
2507         btrfs_free_path(path);
2508         return ret;
2509 }
2510
2511 static u32 btrfs_type_to_imode(u8 type)
2512 {
2513         static u32 imode_by_btrfs_type[] = {
2514                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2515                 [BTRFS_FT_DIR]          = S_IFDIR,
2516                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2517                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2518                 [BTRFS_FT_FIFO]         = S_IFIFO,
2519                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2520                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2521         };
2522
2523         return imode_by_btrfs_type[(type)];
2524 }
2525
2526 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2527                                 struct btrfs_root *root,
2528                                 struct btrfs_path *path,
2529                                 struct inode_record *rec)
2530 {
2531         u8 filetype;
2532         u32 mode = 0700;
2533         int type_recovered = 0;
2534         int ret = 0;
2535
2536         printf("Trying to rebuild inode:%llu\n", rec->ino);
2537
2538         type_recovered = !find_file_type(rec, &filetype);
2539
2540         /*
2541          * Try to determine inode type if type not found.
2542          *
2543          * For found regular file extent, it must be FILE.
2544          * For found dir_item/index, it must be DIR.
2545          *
2546          * For undetermined one, use FILE as fallback.
2547          *
2548          * TODO:
2549          * 1. If found backref(inode_index/item is already handled) to it,
2550          *    it must be DIR.
2551          *    Need new inode-inode ref structure to allow search for that.
2552          */
2553         if (!type_recovered) {
2554                 if (rec->found_file_extent &&
2555                     find_normal_file_extent(root, rec->ino)) {
2556                         type_recovered = 1;
2557                         filetype = BTRFS_FT_REG_FILE;
2558                 } else if (rec->found_dir_item) {
2559                         type_recovered = 1;
2560                         filetype = BTRFS_FT_DIR;
2561                 } else if (!list_empty(&rec->orphan_extents)) {
2562                         type_recovered = 1;
2563                         filetype = BTRFS_FT_REG_FILE;
2564                 } else{
2565                         printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2566                                rec->ino);
2567                         type_recovered = 1;
2568                         filetype = BTRFS_FT_REG_FILE;
2569                 }
2570         }
2571
2572         ret = btrfs_new_inode(trans, root, rec->ino,
2573                               mode | btrfs_type_to_imode(filetype));
2574         if (ret < 0)
2575                 goto out;
2576
2577         /*
2578          * Here inode rebuild is done, we only rebuild the inode item,
2579          * don't repair the nlink(like move to lost+found).
2580          * That is the job of nlink repair.
2581          *
2582          * We just fill the record and return
2583          */
2584         rec->found_dir_item = 1;
2585         rec->imode = mode | btrfs_type_to_imode(filetype);
2586         rec->nlink = 0;
2587         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2588         /* Ensure the inode_nlinks repair function will be called */
2589         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2590 out:
2591         return ret;
2592 }
2593
2594 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2595                                       struct btrfs_root *root,
2596                                       struct btrfs_path *path,
2597                                       struct inode_record *rec)
2598 {
2599         struct orphan_data_extent *orphan;
2600         struct orphan_data_extent *tmp;
2601         int ret = 0;
2602
2603         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2604                 /*
2605                  * Check for conflicting file extents
2606                  *
2607                  * Here we don't know whether the extents is compressed or not,
2608                  * so we can only assume it not compressed nor data offset,
2609                  * and use its disk_len as extent length.
2610                  */
2611                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2612                                        orphan->offset, orphan->disk_len, 0);
2613                 btrfs_release_path(path);
2614                 if (ret < 0)
2615                         goto out;
2616                 if (!ret) {
2617                         fprintf(stderr,
2618                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2619                                 orphan->disk_bytenr, orphan->disk_len);
2620                         ret = btrfs_free_extent(trans,
2621                                         root->fs_info->extent_root,
2622                                         orphan->disk_bytenr, orphan->disk_len,
2623                                         0, root->objectid, orphan->objectid,
2624                                         orphan->offset);
2625                         if (ret < 0)
2626                                 goto out;
2627                 }
2628                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2629                                 orphan->offset, orphan->disk_bytenr,
2630                                 orphan->disk_len, orphan->disk_len);
2631                 if (ret < 0)
2632                         goto out;
2633
2634                 /* Update file size info */
2635                 rec->found_size += orphan->disk_len;
2636                 if (rec->found_size == rec->nbytes)
2637                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2638
2639                 /* Update the file extent hole info too */
2640                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2641                                            orphan->disk_len);
2642                 if (ret < 0)
2643                         goto out;
2644                 if (RB_EMPTY_ROOT(&rec->holes))
2645                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2646
2647                 list_del(&orphan->list);
2648                 free(orphan);
2649         }
2650         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2651 out:
2652         return ret;
2653 }
2654
2655 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2656                                         struct btrfs_root *root,
2657                                         struct btrfs_path *path,
2658                                         struct inode_record *rec)
2659 {
2660         struct rb_node *node;
2661         struct file_extent_hole *hole;
2662         int ret = 0;
2663
2664         node = rb_first(&rec->holes);
2665
2666         while (node) {
2667                 hole = rb_entry(node, struct file_extent_hole, node);
2668                 ret = btrfs_punch_hole(trans, root, rec->ino,
2669                                        hole->start, hole->len);
2670                 if (ret < 0)
2671                         goto out;
2672                 ret = del_file_extent_hole(&rec->holes, hole->start,
2673                                            hole->len);
2674                 if (ret < 0)
2675                         goto out;
2676                 if (RB_EMPTY_ROOT(&rec->holes))
2677                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2678                 node = rb_first(&rec->holes);
2679         }
2680         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2681                rec->ino, root->objectid);
2682 out:
2683         return ret;
2684 }
2685
2686 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2687 {
2688         struct btrfs_trans_handle *trans;
2689         struct btrfs_path *path;
2690         int ret = 0;
2691
2692         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2693                              I_ERR_NO_ORPHAN_ITEM |
2694                              I_ERR_LINK_COUNT_WRONG |
2695                              I_ERR_NO_INODE_ITEM |
2696                              I_ERR_FILE_EXTENT_ORPHAN |
2697                              I_ERR_FILE_EXTENT_DISCOUNT|
2698                              I_ERR_FILE_NBYTES_WRONG)))
2699                 return rec->errors;
2700
2701         path = btrfs_alloc_path();
2702         if (!path)
2703                 return -ENOMEM;
2704
2705         /*
2706          * For nlink repair, it may create a dir and add link, so
2707          * 2 for parent(256)'s dir_index and dir_item
2708          * 2 for lost+found dir's inode_item and inode_ref
2709          * 1 for the new inode_ref of the file
2710          * 2 for lost+found dir's dir_index and dir_item for the file
2711          */
2712         trans = btrfs_start_transaction(root, 7);
2713         if (IS_ERR(trans)) {
2714                 btrfs_free_path(path);
2715                 return PTR_ERR(trans);
2716         }
2717
2718         if (rec->errors & I_ERR_NO_INODE_ITEM)
2719                 ret = repair_inode_no_item(trans, root, path, rec);
2720         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2721                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2722         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2723                 ret = repair_inode_discount_extent(trans, root, path, rec);
2724         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2725                 ret = repair_inode_isize(trans, root, path, rec);
2726         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2727                 ret = repair_inode_orphan_item(trans, root, path, rec);
2728         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2729                 ret = repair_inode_nlinks(trans, root, path, rec);
2730         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2731                 ret = repair_inode_nbytes(trans, root, path, rec);
2732         btrfs_commit_transaction(trans, root);
2733         btrfs_free_path(path);
2734         return ret;
2735 }
2736
2737 static int check_inode_recs(struct btrfs_root *root,
2738                             struct cache_tree *inode_cache)
2739 {
2740         struct cache_extent *cache;
2741         struct ptr_node *node;
2742         struct inode_record *rec;
2743         struct inode_backref *backref;
2744         int stage = 0;
2745         int ret = 0;
2746         int err = 0;
2747         u64 error = 0;
2748         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2749
2750         if (btrfs_root_refs(&root->root_item) == 0) {
2751                 if (!cache_tree_empty(inode_cache))
2752                         fprintf(stderr, "warning line %d\n", __LINE__);
2753                 return 0;
2754         }
2755
2756         /*
2757          * We need to record the highest inode number for later 'lost+found'
2758          * dir creation.
2759          * We must select a ino not used/refered by any existing inode, or
2760          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2761          * this may cause 'lost+found' dir has wrong nlinks.
2762          */
2763         cache = last_cache_extent(inode_cache);
2764         if (cache) {
2765                 node = container_of(cache, struct ptr_node, cache);
2766                 rec = node->data;
2767                 if (rec->ino > root->highest_inode)
2768                         root->highest_inode = rec->ino;
2769         }
2770
2771         /*
2772          * We need to repair backrefs first because we could change some of the
2773          * errors in the inode recs.
2774          *
2775          * We also need to go through and delete invalid backrefs first and then
2776          * add the correct ones second.  We do this because we may get EEXIST
2777          * when adding back the correct index because we hadn't yet deleted the
2778          * invalid index.
2779          *
2780          * For example, if we were missing a dir index then the directories
2781          * isize would be wrong, so if we fixed the isize to what we thought it
2782          * would be and then fixed the backref we'd still have a invalid fs, so
2783          * we need to add back the dir index and then check to see if the isize
2784          * is still wrong.
2785          */
2786         while (stage < 3) {
2787                 stage++;
2788                 if (stage == 3 && !err)
2789                         break;
2790
2791                 cache = search_cache_extent(inode_cache, 0);
2792                 while (repair && cache) {
2793                         node = container_of(cache, struct ptr_node, cache);
2794                         rec = node->data;
2795                         cache = next_cache_extent(cache);
2796
2797                         /* Need to free everything up and rescan */
2798                         if (stage == 3) {
2799                                 remove_cache_extent(inode_cache, &node->cache);
2800                                 free(node);
2801                                 free_inode_rec(rec);
2802                                 continue;
2803                         }
2804
2805                         if (list_empty(&rec->backrefs))
2806                                 continue;
2807
2808                         ret = repair_inode_backrefs(root, rec, inode_cache,
2809                                                     stage == 1);
2810                         if (ret < 0) {
2811                                 err = ret;
2812                                 stage = 2;
2813                                 break;
2814                         } if (ret > 0) {
2815                                 err = -EAGAIN;
2816                         }
2817                 }
2818         }
2819         if (err)
2820                 return err;
2821
2822         rec = get_inode_rec(inode_cache, root_dirid, 0);
2823         if (rec) {
2824                 ret = check_root_dir(rec);
2825                 if (ret) {
2826                         fprintf(stderr, "root %llu root dir %llu error\n",
2827                                 (unsigned long long)root->root_key.objectid,
2828                                 (unsigned long long)root_dirid);
2829                         print_inode_error(root, rec);
2830                         error++;
2831                 }
2832         } else {
2833                 if (repair) {
2834                         struct btrfs_trans_handle *trans;
2835
2836                         trans = btrfs_start_transaction(root, 1);
2837                         if (IS_ERR(trans)) {
2838                                 err = PTR_ERR(trans);
2839                                 return err;
2840                         }
2841
2842                         fprintf(stderr,
2843                                 "root %llu missing its root dir, recreating\n",
2844                                 (unsigned long long)root->objectid);
2845
2846                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2847                         BUG_ON(ret);
2848
2849                         btrfs_commit_transaction(trans, root);
2850                         return -EAGAIN;
2851                 }
2852
2853                 fprintf(stderr, "root %llu root dir %llu not found\n",
2854                         (unsigned long long)root->root_key.objectid,
2855                         (unsigned long long)root_dirid);
2856         }
2857
2858         while (1) {
2859                 cache = search_cache_extent(inode_cache, 0);
2860                 if (!cache)
2861                         break;
2862                 node = container_of(cache, struct ptr_node, cache);
2863                 rec = node->data;
2864                 remove_cache_extent(inode_cache, &node->cache);
2865                 free(node);
2866                 if (rec->ino == root_dirid ||
2867                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
2868                         free_inode_rec(rec);
2869                         continue;
2870                 }
2871
2872                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2873                         ret = check_orphan_item(root, rec->ino);
2874                         if (ret == 0)
2875                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2876                         if (can_free_inode_rec(rec)) {
2877                                 free_inode_rec(rec);
2878                                 continue;
2879                         }
2880                 }
2881
2882                 if (!rec->found_inode_item)
2883                         rec->errors |= I_ERR_NO_INODE_ITEM;
2884                 if (rec->found_link != rec->nlink)
2885                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2886                 if (repair) {
2887                         ret = try_repair_inode(root, rec);
2888                         if (ret == 0 && can_free_inode_rec(rec)) {
2889                                 free_inode_rec(rec);
2890                                 continue;
2891                         }
2892                         ret = 0;
2893                 }
2894
2895                 if (!(repair && ret == 0))
2896                         error++;
2897                 print_inode_error(root, rec);
2898                 list_for_each_entry(backref, &rec->backrefs, list) {
2899                         if (!backref->found_dir_item)
2900                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
2901                         if (!backref->found_dir_index)
2902                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
2903                         if (!backref->found_inode_ref)
2904                                 backref->errors |= REF_ERR_NO_INODE_REF;
2905                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2906                                 " namelen %u name %s filetype %d errors %x",
2907                                 (unsigned long long)backref->dir,
2908                                 (unsigned long long)backref->index,
2909                                 backref->namelen, backref->name,
2910                                 backref->filetype, backref->errors);
2911                         print_ref_error(backref->errors);
2912                 }
2913                 free_inode_rec(rec);
2914         }
2915         return (error > 0) ? -1 : 0;
2916 }
2917
2918 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2919                                         u64 objectid)
2920 {
2921         struct cache_extent *cache;
2922         struct root_record *rec = NULL;
2923         int ret;
2924
2925         cache = lookup_cache_extent(root_cache, objectid, 1);
2926         if (cache) {
2927                 rec = container_of(cache, struct root_record, cache);
2928         } else {
2929                 rec = calloc(1, sizeof(*rec));
2930                 rec->objectid = objectid;
2931                 INIT_LIST_HEAD(&rec->backrefs);
2932                 rec->cache.start = objectid;
2933                 rec->cache.size = 1;
2934
2935                 ret = insert_cache_extent(root_cache, &rec->cache);
2936                 BUG_ON(ret);
2937         }
2938         return rec;
2939 }
2940
2941 static struct root_backref *get_root_backref(struct root_record *rec,
2942                                              u64 ref_root, u64 dir, u64 index,
2943                                              const char *name, int namelen)
2944 {
2945         struct root_backref *backref;
2946
2947         list_for_each_entry(backref, &rec->backrefs, list) {
2948                 if (backref->ref_root != ref_root || backref->dir != dir ||
2949                     backref->namelen != namelen)
2950                         continue;
2951                 if (memcmp(name, backref->name, namelen))
2952                         continue;
2953                 return backref;
2954         }
2955
2956         backref = malloc(sizeof(*backref) + namelen + 1);
2957         memset(backref, 0, sizeof(*backref));
2958         backref->ref_root = ref_root;
2959         backref->dir = dir;
2960         backref->index = index;
2961         backref->namelen = namelen;
2962         memcpy(backref->name, name, namelen);
2963         backref->name[namelen] = '\0';
2964         list_add_tail(&backref->list, &rec->backrefs);
2965         return backref;
2966 }
2967
2968 static void free_root_record(struct cache_extent *cache)
2969 {
2970         struct root_record *rec;
2971         struct root_backref *backref;
2972
2973         rec = container_of(cache, struct root_record, cache);
2974         while (!list_empty(&rec->backrefs)) {
2975                 backref = list_entry(rec->backrefs.next,
2976                                      struct root_backref, list);
2977                 list_del(&backref->list);
2978                 free(backref);
2979         }
2980
2981         kfree(rec);
2982 }
2983
2984 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
2985
2986 static int add_root_backref(struct cache_tree *root_cache,
2987                             u64 root_id, u64 ref_root, u64 dir, u64 index,
2988                             const char *name, int namelen,
2989                             int item_type, int errors)
2990 {
2991         struct root_record *rec;
2992         struct root_backref *backref;
2993
2994         rec = get_root_rec(root_cache, root_id);
2995         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
2996
2997         backref->errors |= errors;
2998
2999         if (item_type != BTRFS_DIR_ITEM_KEY) {
3000                 if (backref->found_dir_index || backref->found_back_ref ||
3001                     backref->found_forward_ref) {
3002                         if (backref->index != index)
3003                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3004                 } else {
3005                         backref->index = index;
3006                 }
3007         }
3008
3009         if (item_type == BTRFS_DIR_ITEM_KEY) {
3010                 if (backref->found_forward_ref)
3011                         rec->found_ref++;
3012                 backref->found_dir_item = 1;
3013         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3014                 backref->found_dir_index = 1;
3015         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3016                 if (backref->found_forward_ref)
3017                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3018                 else if (backref->found_dir_item)
3019                         rec->found_ref++;
3020                 backref->found_forward_ref = 1;
3021         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3022                 if (backref->found_back_ref)
3023                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3024                 backref->found_back_ref = 1;
3025         } else {
3026                 BUG_ON(1);
3027         }
3028
3029         if (backref->found_forward_ref && backref->found_dir_item)
3030                 backref->reachable = 1;
3031         return 0;
3032 }
3033
3034 static int merge_root_recs(struct btrfs_root *root,
3035                            struct cache_tree *src_cache,
3036                            struct cache_tree *dst_cache)
3037 {
3038         struct cache_extent *cache;
3039         struct ptr_node *node;
3040         struct inode_record *rec;
3041         struct inode_backref *backref;
3042         int ret = 0;
3043
3044         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3045                 free_inode_recs_tree(src_cache);
3046                 return 0;
3047         }
3048
3049         while (1) {
3050                 cache = search_cache_extent(src_cache, 0);
3051                 if (!cache)
3052                         break;
3053                 node = container_of(cache, struct ptr_node, cache);
3054                 rec = node->data;
3055                 remove_cache_extent(src_cache, &node->cache);
3056                 free(node);
3057
3058                 ret = is_child_root(root, root->objectid, rec->ino);
3059                 if (ret < 0)
3060                         break;
3061                 else if (ret == 0)
3062                         goto skip;
3063
3064                 list_for_each_entry(backref, &rec->backrefs, list) {
3065                         BUG_ON(backref->found_inode_ref);
3066                         if (backref->found_dir_item)
3067                                 add_root_backref(dst_cache, rec->ino,
3068                                         root->root_key.objectid, backref->dir,
3069                                         backref->index, backref->name,
3070                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3071                                         backref->errors);
3072                         if (backref->found_dir_index)
3073                                 add_root_backref(dst_cache, rec->ino,
3074                                         root->root_key.objectid, backref->dir,
3075                                         backref->index, backref->name,
3076                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3077                                         backref->errors);
3078                 }
3079 skip:
3080                 free_inode_rec(rec);
3081         }
3082         if (ret < 0)
3083                 return ret;
3084         return 0;
3085 }
3086
3087 static int check_root_refs(struct btrfs_root *root,
3088                            struct cache_tree *root_cache)
3089 {
3090         struct root_record *rec;
3091         struct root_record *ref_root;
3092         struct root_backref *backref;
3093         struct cache_extent *cache;
3094         int loop = 1;
3095         int ret;
3096         int error;
3097         int errors = 0;
3098
3099         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3100         rec->found_ref = 1;
3101
3102         /* fixme: this can not detect circular references */
3103         while (loop) {
3104                 loop = 0;
3105                 cache = search_cache_extent(root_cache, 0);
3106                 while (1) {
3107                         if (!cache)
3108                                 break;
3109                         rec = container_of(cache, struct root_record, cache);
3110                         cache = next_cache_extent(cache);
3111
3112                         if (rec->found_ref == 0)
3113                                 continue;
3114
3115                         list_for_each_entry(backref, &rec->backrefs, list) {
3116                                 if (!backref->reachable)
3117                                         continue;
3118
3119                                 ref_root = get_root_rec(root_cache,
3120                                                         backref->ref_root);
3121                                 if (ref_root->found_ref > 0)
3122                                         continue;
3123
3124                                 backref->reachable = 0;
3125                                 rec->found_ref--;
3126                                 if (rec->found_ref == 0)
3127                                         loop = 1;
3128                         }
3129                 }
3130         }
3131
3132         cache = search_cache_extent(root_cache, 0);
3133         while (1) {
3134                 if (!cache)
3135                         break;
3136                 rec = container_of(cache, struct root_record, cache);
3137                 cache = next_cache_extent(cache);
3138
3139                 if (rec->found_ref == 0 &&
3140                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3141                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3142                         ret = check_orphan_item(root->fs_info->tree_root,
3143                                                 rec->objectid);
3144                         if (ret == 0)
3145                                 continue;
3146
3147                         /*
3148                          * If we don't have a root item then we likely just have
3149                          * a dir item in a snapshot for this root but no actual
3150                          * ref key or anything so it's meaningless.
3151                          */
3152                         if (!rec->found_root_item)
3153                                 continue;
3154                         errors++;
3155                         fprintf(stderr, "fs tree %llu not referenced\n",
3156                                 (unsigned long long)rec->objectid);
3157                 }
3158
3159                 error = 0;
3160                 if (rec->found_ref > 0 && !rec->found_root_item)
3161                         error = 1;
3162                 list_for_each_entry(backref, &rec->backrefs, list) {
3163                         if (!backref->found_dir_item)
3164                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3165                         if (!backref->found_dir_index)
3166                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3167                         if (!backref->found_back_ref)
3168                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3169                         if (!backref->found_forward_ref)
3170                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3171                         if (backref->reachable && backref->errors)
3172                                 error = 1;
3173                 }
3174                 if (!error)
3175                         continue;
3176
3177                 errors++;
3178                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3179                         (unsigned long long)rec->objectid, rec->found_ref,
3180                          rec->found_root_item ? "" : "not found");
3181
3182                 list_for_each_entry(backref, &rec->backrefs, list) {
3183                         if (!backref->reachable)
3184                                 continue;
3185                         if (!backref->errors && rec->found_root_item)
3186                                 continue;
3187                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3188                                 " index %llu namelen %u name %s errors %x\n",
3189                                 (unsigned long long)backref->ref_root,
3190                                 (unsigned long long)backref->dir,
3191                                 (unsigned long long)backref->index,
3192                                 backref->namelen, backref->name,
3193                                 backref->errors);
3194                         print_ref_error(backref->errors);
3195                 }
3196         }
3197         return errors > 0 ? 1 : 0;
3198 }
3199
3200 static int process_root_ref(struct extent_buffer *eb, int slot,
3201                             struct btrfs_key *key,
3202                             struct cache_tree *root_cache)
3203 {
3204         u64 dirid;
3205         u64 index;
3206         u32 len;
3207         u32 name_len;
3208         struct btrfs_root_ref *ref;
3209         char namebuf[BTRFS_NAME_LEN];
3210         int error;
3211
3212         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3213
3214         dirid = btrfs_root_ref_dirid(eb, ref);
3215         index = btrfs_root_ref_sequence(eb, ref);
3216         name_len = btrfs_root_ref_name_len(eb, ref);
3217
3218         if (name_len <= BTRFS_NAME_LEN) {
3219                 len = name_len;
3220                 error = 0;
3221         } else {
3222                 len = BTRFS_NAME_LEN;
3223                 error = REF_ERR_NAME_TOO_LONG;
3224         }
3225         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3226
3227         if (key->type == BTRFS_ROOT_REF_KEY) {
3228                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3229                                  index, namebuf, len, key->type, error);
3230         } else {
3231                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3232                                  index, namebuf, len, key->type, error);
3233         }
3234         return 0;
3235 }
3236
3237 static void free_corrupt_block(struct cache_extent *cache)
3238 {
3239         struct btrfs_corrupt_block *corrupt;
3240
3241         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3242         free(corrupt);
3243 }
3244
3245 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3246
3247 /*
3248  * Repair the btree of the given root.
3249  *
3250  * The fix is to remove the node key in corrupt_blocks cache_tree.
3251  * and rebalance the tree.
3252  * After the fix, the btree should be writeable.
3253  */
3254 static int repair_btree(struct btrfs_root *root,
3255                         struct cache_tree *corrupt_blocks)
3256 {
3257         struct btrfs_trans_handle *trans;
3258         struct btrfs_path *path;
3259         struct btrfs_corrupt_block *corrupt;
3260         struct cache_extent *cache;
3261         struct btrfs_key key;
3262         u64 offset;
3263         int level;
3264         int ret = 0;
3265
3266         if (cache_tree_empty(corrupt_blocks))
3267                 return 0;
3268
3269         path = btrfs_alloc_path();
3270         if (!path)
3271                 return -ENOMEM;
3272
3273         trans = btrfs_start_transaction(root, 1);
3274         if (IS_ERR(trans)) {
3275                 ret = PTR_ERR(trans);
3276                 fprintf(stderr, "Error starting transaction: %s\n",
3277                         strerror(-ret));
3278                 goto out_free_path;
3279         }
3280         cache = first_cache_extent(corrupt_blocks);
3281         while (cache) {
3282                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3283                                        cache);
3284                 level = corrupt->level;
3285                 path->lowest_level = level;
3286                 key.objectid = corrupt->key.objectid;
3287                 key.type = corrupt->key.type;
3288                 key.offset = corrupt->key.offset;
3289
3290                 /*
3291                  * Here we don't want to do any tree balance, since it may
3292                  * cause a balance with corrupted brother leaf/node,
3293                  * so ins_len set to 0 here.
3294                  * Balance will be done after all corrupt node/leaf is deleted.
3295                  */
3296                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3297                 if (ret < 0)
3298                         goto out;
3299                 offset = btrfs_node_blockptr(path->nodes[level],
3300                                              path->slots[level]);
3301
3302                 /* Remove the ptr */
3303                 ret = btrfs_del_ptr(trans, root, path, level,
3304                                     path->slots[level]);
3305                 if (ret < 0)
3306                         goto out;
3307                 /*
3308                  * Remove the corresponding extent
3309                  * return value is not concerned.
3310                  */
3311                 btrfs_release_path(path);
3312                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3313                                         0, root->root_key.objectid,
3314                                         level - 1, 0);
3315                 cache = next_cache_extent(cache);
3316         }
3317
3318         /* Balance the btree using btrfs_search_slot() */
3319         cache = first_cache_extent(corrupt_blocks);
3320         while (cache) {
3321                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3322                                        cache);
3323                 memcpy(&key, &corrupt->key, sizeof(key));
3324                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3325                 if (ret < 0)
3326                         goto out;
3327                 /* return will always >0 since it won't find the item */
3328                 ret = 0;
3329                 btrfs_release_path(path);
3330                 cache = next_cache_extent(cache);
3331         }
3332 out:
3333         btrfs_commit_transaction(trans, root);
3334 out_free_path:
3335         btrfs_free_path(path);
3336         return ret;
3337 }
3338
3339 static int check_fs_root(struct btrfs_root *root,
3340                          struct cache_tree *root_cache,
3341                          struct walk_control *wc)
3342 {
3343         int ret = 0;
3344         int err = 0;
3345         int wret;
3346         int level;
3347         struct btrfs_path path;
3348         struct shared_node root_node;
3349         struct root_record *rec;
3350         struct btrfs_root_item *root_item = &root->root_item;
3351         struct cache_tree corrupt_blocks;
3352         struct orphan_data_extent *orphan;
3353         struct orphan_data_extent *tmp;
3354         enum btrfs_tree_block_status status;
3355
3356         /*
3357          * Reuse the corrupt_block cache tree to record corrupted tree block
3358          *
3359          * Unlike the usage in extent tree check, here we do it in a per
3360          * fs/subvol tree base.
3361          */
3362         cache_tree_init(&corrupt_blocks);
3363         root->fs_info->corrupt_blocks = &corrupt_blocks;
3364
3365         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3366                 rec = get_root_rec(root_cache, root->root_key.objectid);
3367                 if (btrfs_root_refs(root_item) > 0)
3368                         rec->found_root_item = 1;
3369         }
3370
3371         btrfs_init_path(&path);
3372         memset(&root_node, 0, sizeof(root_node));
3373         cache_tree_init(&root_node.root_cache);
3374         cache_tree_init(&root_node.inode_cache);
3375
3376         /* Move the orphan extent record to corresponding inode_record */
3377         list_for_each_entry_safe(orphan, tmp,
3378                                  &root->orphan_data_extents, list) {
3379                 struct inode_record *inode;
3380
3381                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3382                                       1);
3383                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3384                 list_move(&orphan->list, &inode->orphan_extents);
3385         }
3386
3387         level = btrfs_header_level(root->node);
3388         memset(wc->nodes, 0, sizeof(wc->nodes));
3389         wc->nodes[level] = &root_node;
3390         wc->active_node = level;
3391         wc->root_level = level;
3392
3393         /* We may not have checked the root block, lets do that now */
3394         if (btrfs_is_leaf(root->node))
3395                 status = btrfs_check_leaf(root, NULL, root->node);
3396         else
3397                 status = btrfs_check_node(root, NULL, root->node);
3398         if (status != BTRFS_TREE_BLOCK_CLEAN)
3399                 return -EIO;
3400
3401         if (btrfs_root_refs(root_item) > 0 ||
3402             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3403                 path.nodes[level] = root->node;
3404                 extent_buffer_get(root->node);
3405                 path.slots[level] = 0;
3406         } else {
3407                 struct btrfs_key key;
3408                 struct btrfs_disk_key found_key;
3409
3410                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3411                 level = root_item->drop_level;
3412                 path.lowest_level = level;
3413                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3414                 if (wret < 0)
3415                         goto skip_walking;
3416                 btrfs_node_key(path.nodes[level], &found_key,
3417                                 path.slots[level]);
3418                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3419                                         sizeof(found_key)));
3420         }
3421
3422         while (1) {
3423                 wret = walk_down_tree(root, &path, wc, &level);
3424                 if (wret < 0)
3425                         ret = wret;
3426                 if (wret != 0)
3427                         break;
3428
3429                 wret = walk_up_tree(root, &path, wc, &level);
3430                 if (wret < 0)
3431                         ret = wret;
3432                 if (wret != 0)
3433                         break;
3434         }
3435 skip_walking:
3436         btrfs_release_path(&path);
3437
3438         if (!cache_tree_empty(&corrupt_blocks)) {
3439                 struct cache_extent *cache;
3440                 struct btrfs_corrupt_block *corrupt;
3441
3442                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3443                        root->root_key.objectid);
3444                 cache = first_cache_extent(&corrupt_blocks);
3445                 while (cache) {
3446                         corrupt = container_of(cache,
3447                                                struct btrfs_corrupt_block,
3448                                                cache);
3449                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3450                                cache->start, corrupt->level,
3451                                corrupt->key.objectid, corrupt->key.type,
3452                                corrupt->key.offset);
3453                         cache = next_cache_extent(cache);
3454                 }
3455                 if (repair) {
3456                         printf("Try to repair the btree for root %llu\n",
3457                                root->root_key.objectid);
3458                         ret = repair_btree(root, &corrupt_blocks);
3459                         if (ret < 0)
3460                                 fprintf(stderr, "Failed to repair btree: %s\n",
3461                                         strerror(-ret));
3462                         if (!ret)
3463                                 printf("Btree for root %llu is fixed\n",
3464                                        root->root_key.objectid);
3465                 }
3466         }
3467
3468         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3469         if (err < 0)
3470                 ret = err;
3471
3472         if (root_node.current) {
3473                 root_node.current->checked = 1;
3474                 maybe_free_inode_rec(&root_node.inode_cache,
3475                                 root_node.current);
3476         }
3477
3478         err = check_inode_recs(root, &root_node.inode_cache);
3479         if (!ret)
3480                 ret = err;
3481
3482         free_corrupt_blocks_tree(&corrupt_blocks);
3483         root->fs_info->corrupt_blocks = NULL;
3484         free_orphan_data_extents(&root->orphan_data_extents);
3485         return ret;
3486 }
3487
3488 static int fs_root_objectid(u64 objectid)
3489 {
3490         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3491             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3492                 return 1;
3493         return is_fstree(objectid);
3494 }
3495
3496 static int check_fs_roots(struct btrfs_root *root,
3497                           struct cache_tree *root_cache)
3498 {
3499         struct btrfs_path path;
3500         struct btrfs_key key;
3501         struct walk_control wc;
3502         struct extent_buffer *leaf, *tree_node;
3503         struct btrfs_root *tmp_root;
3504         struct btrfs_root *tree_root = root->fs_info->tree_root;
3505         int ret;
3506         int err = 0;
3507
3508         /*
3509          * Just in case we made any changes to the extent tree that weren't
3510          * reflected into the free space cache yet.
3511          */
3512         if (repair)
3513                 reset_cached_block_groups(root->fs_info);
3514         memset(&wc, 0, sizeof(wc));
3515         cache_tree_init(&wc.shared);
3516         btrfs_init_path(&path);
3517
3518 again:
3519         key.offset = 0;
3520         key.objectid = 0;
3521         key.type = BTRFS_ROOT_ITEM_KEY;
3522         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3523         if (ret < 0) {
3524                 err = 1;
3525                 goto out;
3526         }
3527         tree_node = tree_root->node;
3528         while (1) {
3529                 if (tree_node != tree_root->node) {
3530                         free_root_recs_tree(root_cache);
3531                         btrfs_release_path(&path);
3532                         goto again;
3533                 }
3534                 leaf = path.nodes[0];
3535                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3536                         ret = btrfs_next_leaf(tree_root, &path);
3537                         if (ret) {
3538                                 if (ret < 0)
3539                                         err = 1;
3540                                 break;
3541                         }
3542                         leaf = path.nodes[0];
3543                 }
3544                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3545                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3546                     fs_root_objectid(key.objectid)) {
3547                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3548                                 tmp_root = btrfs_read_fs_root_no_cache(
3549                                                 root->fs_info, &key);
3550                         } else {
3551                                 key.offset = (u64)-1;
3552                                 tmp_root = btrfs_read_fs_root(
3553                                                 root->fs_info, &key);
3554                         }
3555                         if (IS_ERR(tmp_root)) {
3556                                 err = 1;
3557                                 goto next;
3558                         }
3559                         ret = check_fs_root(tmp_root, root_cache, &wc);
3560                         if (ret == -EAGAIN) {
3561                                 free_root_recs_tree(root_cache);
3562                                 btrfs_release_path(&path);
3563                                 goto again;
3564                         }
3565                         if (ret)
3566                                 err = 1;
3567                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3568                                 btrfs_free_fs_root(tmp_root);
3569                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3570                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3571                         process_root_ref(leaf, path.slots[0], &key,
3572                                          root_cache);
3573                 }
3574 next:
3575                 path.slots[0]++;
3576         }
3577 out:
3578         btrfs_release_path(&path);
3579         if (err)
3580                 free_extent_cache_tree(&wc.shared);
3581         if (!cache_tree_empty(&wc.shared))
3582                 fprintf(stderr, "warning line %d\n", __LINE__);
3583
3584         return err;
3585 }
3586
3587 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3588 {
3589         struct list_head *cur = rec->backrefs.next;
3590         struct extent_backref *back;
3591         struct tree_backref *tback;
3592         struct data_backref *dback;
3593         u64 found = 0;
3594         int err = 0;
3595
3596         while(cur != &rec->backrefs) {
3597                 back = list_entry(cur, struct extent_backref, list);
3598                 cur = cur->next;
3599                 if (!back->found_extent_tree) {
3600                         err = 1;
3601                         if (!print_errs)
3602                                 goto out;
3603                         if (back->is_data) {
3604                                 dback = (struct data_backref *)back;
3605                                 fprintf(stderr, "Backref %llu %s %llu"
3606                                         " owner %llu offset %llu num_refs %lu"
3607                                         " not found in extent tree\n",
3608                                         (unsigned long long)rec->start,
3609                                         back->full_backref ?
3610                                         "parent" : "root",
3611                                         back->full_backref ?
3612                                         (unsigned long long)dback->parent:
3613                                         (unsigned long long)dback->root,
3614                                         (unsigned long long)dback->owner,
3615                                         (unsigned long long)dback->offset,
3616                                         (unsigned long)dback->num_refs);
3617                         } else {
3618                                 tback = (struct tree_backref *)back;
3619                                 fprintf(stderr, "Backref %llu parent %llu"
3620                                         " root %llu not found in extent tree\n",
3621                                         (unsigned long long)rec->start,
3622                                         (unsigned long long)tback->parent,
3623                                         (unsigned long long)tback->root);
3624                         }
3625                 }
3626                 if (!back->is_data && !back->found_ref) {
3627                         err = 1;
3628                         if (!print_errs)
3629                                 goto out;
3630                         tback = (struct tree_backref *)back;
3631                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3632                                 (unsigned long long)rec->start,
3633                                 back->full_backref ? "parent" : "root",
3634                                 back->full_backref ?
3635                                 (unsigned long long)tback->parent :
3636                                 (unsigned long long)tback->root, back);
3637                 }
3638                 if (back->is_data) {
3639                         dback = (struct data_backref *)back;
3640                         if (dback->found_ref != dback->num_refs) {
3641                                 err = 1;
3642                                 if (!print_errs)
3643                                         goto out;
3644                                 fprintf(stderr, "Incorrect local backref count"
3645                                         " on %llu %s %llu owner %llu"
3646                                         " offset %llu found %u wanted %u back %p\n",
3647                                         (unsigned long long)rec->start,
3648                                         back->full_backref ?
3649                                         "parent" : "root",
3650                                         back->full_backref ?
3651                                         (unsigned long long)dback->parent:
3652                                         (unsigned long long)dback->root,
3653                                         (unsigned long long)dback->owner,
3654                                         (unsigned long long)dback->offset,
3655                                         dback->found_ref, dback->num_refs, back);
3656                         }
3657                         if (dback->disk_bytenr != rec->start) {
3658                                 err = 1;
3659                                 if (!print_errs)
3660                                         goto out;
3661                                 fprintf(stderr, "Backref disk bytenr does not"
3662                                         " match extent record, bytenr=%llu, "
3663                                         "ref bytenr=%llu\n",
3664                                         (unsigned long long)rec->start,
3665                                         (unsigned long long)dback->disk_bytenr);
3666                         }
3667
3668                         if (dback->bytes != rec->nr) {
3669                                 err = 1;
3670                                 if (!print_errs)
3671                                         goto out;
3672                                 fprintf(stderr, "Backref bytes do not match "
3673                                         "extent backref, bytenr=%llu, ref "
3674                                         "bytes=%llu, backref bytes=%llu\n",
3675                                         (unsigned long long)rec->start,
3676                                         (unsigned long long)rec->nr,
3677                                         (unsigned long long)dback->bytes);
3678                         }
3679                 }
3680                 if (!back->is_data) {
3681                         found += 1;
3682                 } else {
3683                         dback = (struct data_backref *)back;
3684                         found += dback->found_ref;
3685                 }
3686         }
3687         if (found != rec->refs) {
3688                 err = 1;
3689                 if (!print_errs)
3690                         goto out;
3691                 fprintf(stderr, "Incorrect global backref count "
3692                         "on %llu found %llu wanted %llu\n",
3693                         (unsigned long long)rec->start,
3694                         (unsigned long long)found,
3695                         (unsigned long long)rec->refs);
3696         }
3697 out:
3698         return err;
3699 }
3700
3701 static int free_all_extent_backrefs(struct extent_record *rec)
3702 {
3703         struct extent_backref *back;
3704         struct list_head *cur;
3705         while (!list_empty(&rec->backrefs)) {
3706                 cur = rec->backrefs.next;
3707                 back = list_entry(cur, struct extent_backref, list);
3708                 list_del(cur);
3709                 free(back);
3710         }
3711         return 0;
3712 }
3713
3714 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3715                                      struct cache_tree *extent_cache)
3716 {
3717         struct cache_extent *cache;
3718         struct extent_record *rec;
3719
3720         while (1) {
3721                 cache = first_cache_extent(extent_cache);
3722                 if (!cache)
3723                         break;
3724                 rec = container_of(cache, struct extent_record, cache);
3725                 remove_cache_extent(extent_cache, cache);
3726                 free_all_extent_backrefs(rec);
3727                 free(rec);
3728         }
3729 }
3730
3731 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3732                                  struct extent_record *rec)
3733 {
3734         if (rec->content_checked && rec->owner_ref_checked &&
3735             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3736             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3737             !rec->bad_full_backref) {
3738                 remove_cache_extent(extent_cache, &rec->cache);
3739                 free_all_extent_backrefs(rec);
3740                 list_del_init(&rec->list);
3741                 free(rec);
3742         }
3743         return 0;
3744 }
3745
3746 static int check_owner_ref(struct btrfs_root *root,
3747                             struct extent_record *rec,
3748                             struct extent_buffer *buf)
3749 {
3750         struct extent_backref *node;
3751         struct tree_backref *back;
3752         struct btrfs_root *ref_root;
3753         struct btrfs_key key;
3754         struct btrfs_path path;
3755         struct extent_buffer *parent;
3756         int level;
3757         int found = 0;
3758         int ret;
3759
3760         list_for_each_entry(node, &rec->backrefs, list) {
3761                 if (node->is_data)
3762                         continue;
3763                 if (!node->found_ref)
3764                         continue;
3765                 if (node->full_backref)
3766                         continue;
3767                 back = (struct tree_backref *)node;
3768                 if (btrfs_header_owner(buf) == back->root)
3769                         return 0;
3770         }
3771         BUG_ON(rec->is_root);
3772
3773         /* try to find the block by search corresponding fs tree */
3774         key.objectid = btrfs_header_owner(buf);
3775         key.type = BTRFS_ROOT_ITEM_KEY;
3776         key.offset = (u64)-1;
3777
3778         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3779         if (IS_ERR(ref_root))
3780                 return 1;
3781
3782         level = btrfs_header_level(buf);
3783         if (level == 0)
3784                 btrfs_item_key_to_cpu(buf, &key, 0);
3785         else
3786                 btrfs_node_key_to_cpu(buf, &key, 0);
3787
3788         btrfs_init_path(&path);
3789         path.lowest_level = level + 1;
3790         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3791         if (ret < 0)
3792                 return 0;
3793
3794         parent = path.nodes[level + 1];
3795         if (parent && buf->start == btrfs_node_blockptr(parent,
3796                                                         path.slots[level + 1]))
3797                 found = 1;
3798
3799         btrfs_release_path(&path);
3800         return found ? 0 : 1;
3801 }
3802
3803 static int is_extent_tree_record(struct extent_record *rec)
3804 {
3805         struct list_head *cur = rec->backrefs.next;
3806         struct extent_backref *node;
3807         struct tree_backref *back;
3808         int is_extent = 0;
3809
3810         while(cur != &rec->backrefs) {
3811                 node = list_entry(cur, struct extent_backref, list);
3812                 cur = cur->next;
3813                 if (node->is_data)
3814                         return 0;
3815                 back = (struct tree_backref *)node;
3816                 if (node->full_backref)
3817                         return 0;
3818                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3819                         is_extent = 1;
3820         }
3821         return is_extent;
3822 }
3823
3824
3825 static int record_bad_block_io(struct btrfs_fs_info *info,
3826                                struct cache_tree *extent_cache,
3827                                u64 start, u64 len)
3828 {
3829         struct extent_record *rec;
3830         struct cache_extent *cache;
3831         struct btrfs_key key;
3832
3833         cache = lookup_cache_extent(extent_cache, start, len);
3834         if (!cache)
3835                 return 0;
3836
3837         rec = container_of(cache, struct extent_record, cache);
3838         if (!is_extent_tree_record(rec))
3839                 return 0;
3840
3841         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3842         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3843 }
3844
3845 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3846                        struct extent_buffer *buf, int slot)
3847 {
3848         if (btrfs_header_level(buf)) {
3849                 struct btrfs_key_ptr ptr1, ptr2;
3850
3851                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3852                                    sizeof(struct btrfs_key_ptr));
3853                 read_extent_buffer(buf, &ptr2,
3854                                    btrfs_node_key_ptr_offset(slot + 1),
3855                                    sizeof(struct btrfs_key_ptr));
3856                 write_extent_buffer(buf, &ptr1,
3857                                     btrfs_node_key_ptr_offset(slot + 1),
3858                                     sizeof(struct btrfs_key_ptr));
3859                 write_extent_buffer(buf, &ptr2,
3860                                     btrfs_node_key_ptr_offset(slot),
3861                                     sizeof(struct btrfs_key_ptr));
3862                 if (slot == 0) {
3863                         struct btrfs_disk_key key;
3864                         btrfs_node_key(buf, &key, 0);
3865                         btrfs_fixup_low_keys(root, path, &key,
3866                                              btrfs_header_level(buf) + 1);
3867                 }
3868         } else {
3869                 struct btrfs_item *item1, *item2;
3870                 struct btrfs_key k1, k2;
3871                 char *item1_data, *item2_data;
3872                 u32 item1_offset, item2_offset, item1_size, item2_size;
3873
3874                 item1 = btrfs_item_nr(slot);
3875                 item2 = btrfs_item_nr(slot + 1);
3876                 btrfs_item_key_to_cpu(buf, &k1, slot);
3877                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3878                 item1_offset = btrfs_item_offset(buf, item1);
3879                 item2_offset = btrfs_item_offset(buf, item2);
3880                 item1_size = btrfs_item_size(buf, item1);
3881                 item2_size = btrfs_item_size(buf, item2);
3882
3883                 item1_data = malloc(item1_size);
3884                 if (!item1_data)
3885                         return -ENOMEM;
3886                 item2_data = malloc(item2_size);
3887                 if (!item2_data) {
3888                         free(item1_data);
3889                         return -ENOMEM;
3890                 }
3891
3892                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3893                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3894
3895                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3896                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3897                 free(item1_data);
3898                 free(item2_data);
3899
3900                 btrfs_set_item_offset(buf, item1, item2_offset);
3901                 btrfs_set_item_offset(buf, item2, item1_offset);
3902                 btrfs_set_item_size(buf, item1, item2_size);
3903                 btrfs_set_item_size(buf, item2, item1_size);
3904
3905                 path->slots[0] = slot;
3906                 btrfs_set_item_key_unsafe(root, path, &k2);
3907                 path->slots[0] = slot + 1;
3908                 btrfs_set_item_key_unsafe(root, path, &k1);
3909         }
3910         return 0;
3911 }
3912
3913 static int fix_key_order(struct btrfs_trans_handle *trans,
3914                          struct btrfs_root *root,
3915                          struct btrfs_path *path)
3916 {
3917         struct extent_buffer *buf;
3918         struct btrfs_key k1, k2;
3919         int i;
3920         int level = path->lowest_level;
3921         int ret = -EIO;
3922
3923         buf = path->nodes[level];
3924         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
3925                 if (level) {
3926                         btrfs_node_key_to_cpu(buf, &k1, i);
3927                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
3928                 } else {
3929                         btrfs_item_key_to_cpu(buf, &k1, i);
3930                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
3931                 }
3932                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
3933                         continue;
3934                 ret = swap_values(root, path, buf, i);
3935                 if (ret)
3936                         break;
3937                 btrfs_mark_buffer_dirty(buf);
3938                 i = 0;
3939         }
3940         return ret;
3941 }
3942
3943 static int delete_bogus_item(struct btrfs_trans_handle *trans,
3944                              struct btrfs_root *root,
3945                              struct btrfs_path *path,
3946                              struct extent_buffer *buf, int slot)
3947 {
3948         struct btrfs_key key;
3949         int nritems = btrfs_header_nritems(buf);
3950
3951         btrfs_item_key_to_cpu(buf, &key, slot);
3952
3953         /* These are all the keys we can deal with missing. */
3954         if (key.type != BTRFS_DIR_INDEX_KEY &&
3955             key.type != BTRFS_EXTENT_ITEM_KEY &&
3956             key.type != BTRFS_METADATA_ITEM_KEY &&
3957             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
3958             key.type != BTRFS_EXTENT_DATA_REF_KEY)
3959                 return -1;
3960
3961         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
3962                (unsigned long long)key.objectid, key.type,
3963                (unsigned long long)key.offset, slot, buf->start);
3964         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
3965                               btrfs_item_nr_offset(slot + 1),
3966                               sizeof(struct btrfs_item) *
3967                               (nritems - slot - 1));
3968         btrfs_set_header_nritems(buf, nritems - 1);
3969         if (slot == 0) {
3970                 struct btrfs_disk_key disk_key;
3971
3972                 btrfs_item_key(buf, &disk_key, 0);
3973                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
3974         }
3975         btrfs_mark_buffer_dirty(buf);
3976         return 0;
3977 }
3978
3979 static int fix_item_offset(struct btrfs_trans_handle *trans,
3980                            struct btrfs_root *root,
3981                            struct btrfs_path *path)
3982 {
3983         struct extent_buffer *buf;
3984         int i;
3985         int ret = 0;
3986
3987         /* We should only get this for leaves */
3988         BUG_ON(path->lowest_level);
3989         buf = path->nodes[0];
3990 again:
3991         for (i = 0; i < btrfs_header_nritems(buf); i++) {
3992                 unsigned int shift = 0, offset;
3993
3994                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
3995                     BTRFS_LEAF_DATA_SIZE(root)) {
3996                         if (btrfs_item_end_nr(buf, i) >
3997                             BTRFS_LEAF_DATA_SIZE(root)) {
3998                                 ret = delete_bogus_item(trans, root, path,
3999                                                         buf, i);
4000                                 if (!ret)
4001                                         goto again;
4002                                 fprintf(stderr, "item is off the end of the "
4003                                         "leaf, can't fix\n");
4004                                 ret = -EIO;
4005                                 break;
4006                         }
4007                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4008                                 btrfs_item_end_nr(buf, i);
4009                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4010                            btrfs_item_offset_nr(buf, i - 1)) {
4011                         if (btrfs_item_end_nr(buf, i) >
4012                             btrfs_item_offset_nr(buf, i - 1)) {
4013                                 ret = delete_bogus_item(trans, root, path,
4014                                                         buf, i);
4015                                 if (!ret)
4016                                         goto again;
4017                                 fprintf(stderr, "items overlap, can't fix\n");
4018                                 ret = -EIO;
4019                                 break;
4020                         }
4021                         shift = btrfs_item_offset_nr(buf, i - 1) -
4022                                 btrfs_item_end_nr(buf, i);
4023                 }
4024                 if (!shift)
4025                         continue;
4026
4027                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4028                        i, shift, (unsigned long long)buf->start);
4029                 offset = btrfs_item_offset_nr(buf, i);
4030                 memmove_extent_buffer(buf,
4031                                       btrfs_leaf_data(buf) + offset + shift,
4032                                       btrfs_leaf_data(buf) + offset,
4033                                       btrfs_item_size_nr(buf, i));
4034                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4035                                       offset + shift);
4036                 btrfs_mark_buffer_dirty(buf);
4037         }
4038
4039         /*
4040          * We may have moved things, in which case we want to exit so we don't
4041          * write those changes out.  Once we have proper abort functionality in
4042          * progs this can be changed to something nicer.
4043          */
4044         BUG_ON(ret);
4045         return ret;
4046 }
4047
4048 /*
4049  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4050  * then just return -EIO.
4051  */
4052 static int try_to_fix_bad_block(struct btrfs_root *root,
4053                                 struct extent_buffer *buf,
4054                                 enum btrfs_tree_block_status status)
4055 {
4056         struct btrfs_trans_handle *trans;
4057         struct ulist *roots;
4058         struct ulist_node *node;
4059         struct btrfs_root *search_root;
4060         struct btrfs_path *path;
4061         struct ulist_iterator iter;
4062         struct btrfs_key root_key, key;
4063         int ret;
4064
4065         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4066             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4067                 return -EIO;
4068
4069         path = btrfs_alloc_path();
4070         if (!path)
4071                 return -EIO;
4072
4073         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4074                                    0, &roots);
4075         if (ret) {
4076                 btrfs_free_path(path);
4077                 return -EIO;
4078         }
4079
4080         ULIST_ITER_INIT(&iter);
4081         while ((node = ulist_next(roots, &iter))) {
4082                 root_key.objectid = node->val;
4083                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4084                 root_key.offset = (u64)-1;
4085
4086                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4087                 if (IS_ERR(root)) {
4088                         ret = -EIO;
4089                         break;
4090                 }
4091
4092
4093                 trans = btrfs_start_transaction(search_root, 0);
4094                 if (IS_ERR(trans)) {
4095                         ret = PTR_ERR(trans);
4096                         break;
4097                 }
4098
4099                 path->lowest_level = btrfs_header_level(buf);
4100                 path->skip_check_block = 1;
4101                 if (path->lowest_level)
4102                         btrfs_node_key_to_cpu(buf, &key, 0);
4103                 else
4104                         btrfs_item_key_to_cpu(buf, &key, 0);
4105                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4106                 if (ret) {
4107                         ret = -EIO;
4108                         btrfs_commit_transaction(trans, search_root);
4109                         break;
4110                 }
4111                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4112                         ret = fix_key_order(trans, search_root, path);
4113                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4114                         ret = fix_item_offset(trans, search_root, path);
4115                 if (ret) {
4116                         btrfs_commit_transaction(trans, search_root);
4117                         break;
4118                 }
4119                 btrfs_release_path(path);
4120                 btrfs_commit_transaction(trans, search_root);
4121         }
4122         ulist_free(roots);
4123         btrfs_free_path(path);
4124         return ret;
4125 }
4126
4127 static int check_block(struct btrfs_root *root,
4128                        struct cache_tree *extent_cache,
4129                        struct extent_buffer *buf, u64 flags)
4130 {
4131         struct extent_record *rec;
4132         struct cache_extent *cache;
4133         struct btrfs_key key;
4134         enum btrfs_tree_block_status status;
4135         int ret = 0;
4136         int level;
4137
4138         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4139         if (!cache)
4140                 return 1;
4141         rec = container_of(cache, struct extent_record, cache);
4142         rec->generation = btrfs_header_generation(buf);
4143
4144         level = btrfs_header_level(buf);
4145         if (btrfs_header_nritems(buf) > 0) {
4146
4147                 if (level == 0)
4148                         btrfs_item_key_to_cpu(buf, &key, 0);
4149                 else
4150                         btrfs_node_key_to_cpu(buf, &key, 0);
4151
4152                 rec->info_objectid = key.objectid;
4153         }
4154         rec->info_level = level;
4155
4156         if (btrfs_is_leaf(buf))
4157                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4158         else
4159                 status = btrfs_check_node(root, &rec->parent_key, buf);
4160
4161         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4162                 if (repair)
4163                         status = try_to_fix_bad_block(root, buf, status);
4164                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4165                         ret = -EIO;
4166                         fprintf(stderr, "bad block %llu\n",
4167                                 (unsigned long long)buf->start);
4168                 } else {
4169                         /*
4170                          * Signal to callers we need to start the scan over
4171                          * again since we'll have cow'ed blocks.
4172                          */
4173                         ret = -EAGAIN;
4174                 }
4175         } else {
4176                 rec->content_checked = 1;
4177                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4178                         rec->owner_ref_checked = 1;
4179                 else {
4180                         ret = check_owner_ref(root, rec, buf);
4181                         if (!ret)
4182                                 rec->owner_ref_checked = 1;
4183                 }
4184         }
4185         if (!ret)
4186                 maybe_free_extent_rec(extent_cache, rec);
4187         return ret;
4188 }
4189
4190 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4191                                                 u64 parent, u64 root)
4192 {
4193         struct list_head *cur = rec->backrefs.next;
4194         struct extent_backref *node;
4195         struct tree_backref *back;
4196
4197         while(cur != &rec->backrefs) {
4198                 node = list_entry(cur, struct extent_backref, list);
4199                 cur = cur->next;
4200                 if (node->is_data)
4201                         continue;
4202                 back = (struct tree_backref *)node;
4203                 if (parent > 0) {
4204                         if (!node->full_backref)
4205                                 continue;
4206                         if (parent == back->parent)
4207                                 return back;
4208                 } else {
4209                         if (node->full_backref)
4210                                 continue;
4211                         if (back->root == root)
4212                                 return back;
4213                 }
4214         }
4215         return NULL;
4216 }
4217
4218 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4219                                                 u64 parent, u64 root)
4220 {
4221         struct tree_backref *ref = malloc(sizeof(*ref));
4222         memset(&ref->node, 0, sizeof(ref->node));
4223         if (parent > 0) {
4224                 ref->parent = parent;
4225                 ref->node.full_backref = 1;
4226         } else {
4227                 ref->root = root;
4228                 ref->node.full_backref = 0;
4229         }
4230         list_add_tail(&ref->node.list, &rec->backrefs);
4231
4232         return ref;
4233 }
4234
4235 static struct data_backref *find_data_backref(struct extent_record *rec,
4236                                                 u64 parent, u64 root,
4237                                                 u64 owner, u64 offset,
4238                                                 int found_ref,
4239                                                 u64 disk_bytenr, u64 bytes)
4240 {
4241         struct list_head *cur = rec->backrefs.next;
4242         struct extent_backref *node;
4243         struct data_backref *back;
4244
4245         while(cur != &rec->backrefs) {
4246                 node = list_entry(cur, struct extent_backref, list);
4247                 cur = cur->next;
4248                 if (!node->is_data)
4249                         continue;
4250                 back = (struct data_backref *)node;
4251                 if (parent > 0) {
4252                         if (!node->full_backref)
4253                                 continue;
4254                         if (parent == back->parent)
4255                                 return back;
4256                 } else {
4257                         if (node->full_backref)
4258                                 continue;
4259                         if (back->root == root && back->owner == owner &&
4260                             back->offset == offset) {
4261                                 if (found_ref && node->found_ref &&
4262                                     (back->bytes != bytes ||
4263                                     back->disk_bytenr != disk_bytenr))
4264                                         continue;
4265                                 return back;
4266                         }
4267                 }
4268         }
4269         return NULL;
4270 }
4271
4272 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4273                                                 u64 parent, u64 root,
4274                                                 u64 owner, u64 offset,
4275                                                 u64 max_size)
4276 {
4277         struct data_backref *ref = malloc(sizeof(*ref));
4278         memset(&ref->node, 0, sizeof(ref->node));
4279         ref->node.is_data = 1;
4280
4281         if (parent > 0) {
4282                 ref->parent = parent;
4283                 ref->owner = 0;
4284                 ref->offset = 0;
4285                 ref->node.full_backref = 1;
4286         } else {
4287                 ref->root = root;
4288                 ref->owner = owner;
4289                 ref->offset = offset;
4290                 ref->node.full_backref = 0;
4291         }
4292         ref->bytes = max_size;
4293         ref->found_ref = 0;
4294         ref->num_refs = 0;
4295         list_add_tail(&ref->node.list, &rec->backrefs);
4296         if (max_size > rec->max_size)
4297                 rec->max_size = max_size;
4298         return ref;
4299 }
4300
4301 static int add_extent_rec(struct cache_tree *extent_cache,
4302                           struct btrfs_key *parent_key, u64 parent_gen,
4303                           u64 start, u64 nr, u64 extent_item_refs,
4304                           int is_root, int inc_ref, int set_checked,
4305                           int metadata, int extent_rec, u64 max_size)
4306 {
4307         struct extent_record *rec;
4308         struct cache_extent *cache;
4309         int ret = 0;
4310         int dup = 0;
4311
4312         cache = lookup_cache_extent(extent_cache, start, nr);
4313         if (cache) {
4314                 rec = container_of(cache, struct extent_record, cache);
4315                 if (inc_ref)
4316                         rec->refs++;
4317                 if (rec->nr == 1)
4318                         rec->nr = max(nr, max_size);
4319
4320                 /*
4321                  * We need to make sure to reset nr to whatever the extent
4322                  * record says was the real size, this way we can compare it to
4323                  * the backrefs.
4324                  */
4325                 if (extent_rec) {
4326                         if (start != rec->start || rec->found_rec) {
4327                                 struct extent_record *tmp;
4328
4329                                 dup = 1;
4330                                 if (list_empty(&rec->list))
4331                                         list_add_tail(&rec->list,
4332                                                       &duplicate_extents);
4333
4334                                 /*
4335                                  * We have to do this song and dance in case we
4336                                  * find an extent record that falls inside of
4337                                  * our current extent record but does not have
4338                                  * the same objectid.
4339                                  */
4340                                 tmp = malloc(sizeof(*tmp));
4341                                 if (!tmp)
4342                                         return -ENOMEM;
4343                                 tmp->start = start;
4344                                 tmp->max_size = max_size;
4345                                 tmp->nr = nr;
4346                                 tmp->found_rec = 1;
4347                                 tmp->metadata = metadata;
4348                                 tmp->extent_item_refs = extent_item_refs;
4349                                 INIT_LIST_HEAD(&tmp->list);
4350                                 list_add_tail(&tmp->list, &rec->dups);
4351                                 rec->num_duplicates++;
4352                         } else {
4353                                 rec->nr = nr;
4354                                 rec->found_rec = 1;
4355                         }
4356                 }
4357
4358                 if (extent_item_refs && !dup) {
4359                         if (rec->extent_item_refs) {
4360                                 fprintf(stderr, "block %llu rec "
4361                                         "extent_item_refs %llu, passed %llu\n",
4362                                         (unsigned long long)start,
4363                                         (unsigned long long)
4364                                                         rec->extent_item_refs,
4365                                         (unsigned long long)extent_item_refs);
4366                         }
4367                         rec->extent_item_refs = extent_item_refs;
4368                 }
4369                 if (is_root)
4370                         rec->is_root = 1;
4371                 if (set_checked) {
4372                         rec->content_checked = 1;
4373                         rec->owner_ref_checked = 1;
4374                 }
4375
4376                 if (parent_key)
4377                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4378                 if (parent_gen)
4379                         rec->parent_generation = parent_gen;
4380
4381                 if (rec->max_size < max_size)
4382                         rec->max_size = max_size;
4383
4384                 maybe_free_extent_rec(extent_cache, rec);
4385                 return ret;
4386         }
4387         rec = malloc(sizeof(*rec));
4388         rec->start = start;
4389         rec->max_size = max_size;
4390         rec->nr = max(nr, max_size);
4391         rec->found_rec = !!extent_rec;
4392         rec->content_checked = 0;
4393         rec->owner_ref_checked = 0;
4394         rec->num_duplicates = 0;
4395         rec->metadata = metadata;
4396         rec->flag_block_full_backref = -1;
4397         rec->bad_full_backref = 0;
4398         INIT_LIST_HEAD(&rec->backrefs);
4399         INIT_LIST_HEAD(&rec->dups);
4400         INIT_LIST_HEAD(&rec->list);
4401
4402         if (is_root)
4403                 rec->is_root = 1;
4404         else
4405                 rec->is_root = 0;
4406
4407         if (inc_ref)
4408                 rec->refs = 1;
4409         else
4410                 rec->refs = 0;
4411
4412         if (extent_item_refs)
4413                 rec->extent_item_refs = extent_item_refs;
4414         else
4415                 rec->extent_item_refs = 0;
4416
4417         if (parent_key)
4418                 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4419         else
4420                 memset(&rec->parent_key, 0, sizeof(*parent_key));
4421
4422         if (parent_gen)
4423                 rec->parent_generation = parent_gen;
4424         else
4425                 rec->parent_generation = 0;
4426
4427         rec->cache.start = start;
4428         rec->cache.size = nr;
4429         ret = insert_cache_extent(extent_cache, &rec->cache);
4430         BUG_ON(ret);
4431         bytes_used += nr;
4432         if (set_checked) {
4433                 rec->content_checked = 1;
4434                 rec->owner_ref_checked = 1;
4435         }
4436         return ret;
4437 }
4438
4439 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4440                             u64 parent, u64 root, int found_ref)
4441 {
4442         struct extent_record *rec;
4443         struct tree_backref *back;
4444         struct cache_extent *cache;
4445
4446         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4447         if (!cache) {
4448                 add_extent_rec(extent_cache, NULL, 0, bytenr,
4449                                1, 0, 0, 0, 0, 1, 0, 0);
4450                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4451                 if (!cache)
4452                         abort();
4453         }
4454
4455         rec = container_of(cache, struct extent_record, cache);
4456         if (rec->start != bytenr) {
4457                 abort();
4458         }
4459
4460         back = find_tree_backref(rec, parent, root);
4461         if (!back)
4462                 back = alloc_tree_backref(rec, parent, root);
4463
4464         if (found_ref) {
4465                 if (back->node.found_ref) {
4466                         fprintf(stderr, "Extent back ref already exists "
4467                                 "for %llu parent %llu root %llu \n",
4468                                 (unsigned long long)bytenr,
4469                                 (unsigned long long)parent,
4470                                 (unsigned long long)root);
4471                 }
4472                 back->node.found_ref = 1;
4473         } else {
4474                 if (back->node.found_extent_tree) {
4475                         fprintf(stderr, "Extent back ref already exists "
4476                                 "for %llu parent %llu root %llu \n",
4477                                 (unsigned long long)bytenr,
4478                                 (unsigned long long)parent,
4479                                 (unsigned long long)root);
4480                 }
4481                 back->node.found_extent_tree = 1;
4482         }
4483         maybe_free_extent_rec(extent_cache, rec);
4484         return 0;
4485 }
4486
4487 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4488                             u64 parent, u64 root, u64 owner, u64 offset,
4489                             u32 num_refs, int found_ref, u64 max_size)
4490 {
4491         struct extent_record *rec;
4492         struct data_backref *back;
4493         struct cache_extent *cache;
4494
4495         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4496         if (!cache) {
4497                 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4498                                0, 0, max_size);
4499                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4500                 if (!cache)
4501                         abort();
4502         }
4503
4504         rec = container_of(cache, struct extent_record, cache);
4505         if (rec->max_size < max_size)
4506                 rec->max_size = max_size;
4507
4508         /*
4509          * If found_ref is set then max_size is the real size and must match the
4510          * existing refs.  So if we have already found a ref then we need to
4511          * make sure that this ref matches the existing one, otherwise we need
4512          * to add a new backref so we can notice that the backrefs don't match
4513          * and we need to figure out who is telling the truth.  This is to
4514          * account for that awful fsync bug I introduced where we'd end up with
4515          * a btrfs_file_extent_item that would have its length include multiple
4516          * prealloc extents or point inside of a prealloc extent.
4517          */
4518         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4519                                  bytenr, max_size);
4520         if (!back)
4521                 back = alloc_data_backref(rec, parent, root, owner, offset,
4522                                           max_size);
4523
4524         if (found_ref) {
4525                 BUG_ON(num_refs != 1);
4526                 if (back->node.found_ref)
4527                         BUG_ON(back->bytes != max_size);
4528                 back->node.found_ref = 1;
4529                 back->found_ref += 1;
4530                 back->bytes = max_size;
4531                 back->disk_bytenr = bytenr;
4532                 rec->refs += 1;
4533                 rec->content_checked = 1;
4534                 rec->owner_ref_checked = 1;
4535         } else {
4536                 if (back->node.found_extent_tree) {
4537                         fprintf(stderr, "Extent back ref already exists "
4538                                 "for %llu parent %llu root %llu "
4539                                 "owner %llu offset %llu num_refs %lu\n",
4540                                 (unsigned long long)bytenr,
4541                                 (unsigned long long)parent,
4542                                 (unsigned long long)root,
4543                                 (unsigned long long)owner,
4544                                 (unsigned long long)offset,
4545                                 (unsigned long)num_refs);
4546                 }
4547                 back->num_refs = num_refs;
4548                 back->node.found_extent_tree = 1;
4549         }
4550         maybe_free_extent_rec(extent_cache, rec);
4551         return 0;
4552 }
4553
4554 static int add_pending(struct cache_tree *pending,
4555                        struct cache_tree *seen, u64 bytenr, u32 size)
4556 {
4557         int ret;
4558         ret = add_cache_extent(seen, bytenr, size);
4559         if (ret)
4560                 return ret;
4561         add_cache_extent(pending, bytenr, size);
4562         return 0;
4563 }
4564
4565 static int pick_next_pending(struct cache_tree *pending,
4566                         struct cache_tree *reada,
4567                         struct cache_tree *nodes,
4568                         u64 last, struct block_info *bits, int bits_nr,
4569                         int *reada_bits)
4570 {
4571         unsigned long node_start = last;
4572         struct cache_extent *cache;
4573         int ret;
4574
4575         cache = search_cache_extent(reada, 0);
4576         if (cache) {
4577                 bits[0].start = cache->start;
4578                 bits[0].size = cache->size;
4579                 *reada_bits = 1;
4580                 return 1;
4581         }
4582         *reada_bits = 0;
4583         if (node_start > 32768)
4584                 node_start -= 32768;
4585
4586         cache = search_cache_extent(nodes, node_start);
4587         if (!cache)
4588                 cache = search_cache_extent(nodes, 0);
4589
4590         if (!cache) {
4591                  cache = search_cache_extent(pending, 0);
4592                  if (!cache)
4593                          return 0;
4594                  ret = 0;
4595                  do {
4596                          bits[ret].start = cache->start;
4597                          bits[ret].size = cache->size;
4598                          cache = next_cache_extent(cache);
4599                          ret++;
4600                  } while (cache && ret < bits_nr);
4601                  return ret;
4602         }
4603
4604         ret = 0;
4605         do {
4606                 bits[ret].start = cache->start;
4607                 bits[ret].size = cache->size;
4608                 cache = next_cache_extent(cache);
4609                 ret++;
4610         } while (cache && ret < bits_nr);
4611
4612         if (bits_nr - ret > 8) {
4613                 u64 lookup = bits[0].start + bits[0].size;
4614                 struct cache_extent *next;
4615                 next = search_cache_extent(pending, lookup);
4616                 while(next) {
4617                         if (next->start - lookup > 32768)
4618                                 break;
4619                         bits[ret].start = next->start;
4620                         bits[ret].size = next->size;
4621                         lookup = next->start + next->size;
4622                         ret++;
4623                         if (ret == bits_nr)
4624                                 break;
4625                         next = next_cache_extent(next);
4626                         if (!next)
4627                                 break;
4628                 }
4629         }
4630         return ret;
4631 }
4632
4633 static void free_chunk_record(struct cache_extent *cache)
4634 {
4635         struct chunk_record *rec;
4636
4637         rec = container_of(cache, struct chunk_record, cache);
4638         list_del_init(&rec->list);
4639         list_del_init(&rec->dextents);
4640         free(rec);
4641 }
4642
4643 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4644 {
4645         cache_tree_free_extents(chunk_cache, free_chunk_record);
4646 }
4647
4648 static void free_device_record(struct rb_node *node)
4649 {
4650         struct device_record *rec;
4651
4652         rec = container_of(node, struct device_record, node);
4653         free(rec);
4654 }
4655
4656 FREE_RB_BASED_TREE(device_cache, free_device_record);
4657
4658 int insert_block_group_record(struct block_group_tree *tree,
4659                               struct block_group_record *bg_rec)
4660 {
4661         int ret;
4662
4663         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4664         if (ret)
4665                 return ret;
4666
4667         list_add_tail(&bg_rec->list, &tree->block_groups);
4668         return 0;
4669 }
4670
4671 static void free_block_group_record(struct cache_extent *cache)
4672 {
4673         struct block_group_record *rec;
4674
4675         rec = container_of(cache, struct block_group_record, cache);
4676         list_del_init(&rec->list);
4677         free(rec);
4678 }
4679
4680 void free_block_group_tree(struct block_group_tree *tree)
4681 {
4682         cache_tree_free_extents(&tree->tree, free_block_group_record);
4683 }
4684
4685 int insert_device_extent_record(struct device_extent_tree *tree,
4686                                 struct device_extent_record *de_rec)
4687 {
4688         int ret;
4689
4690         /*
4691          * Device extent is a bit different from the other extents, because
4692          * the extents which belong to the different devices may have the
4693          * same start and size, so we need use the special extent cache
4694          * search/insert functions.
4695          */
4696         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4697         if (ret)
4698                 return ret;
4699
4700         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4701         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4702         return 0;
4703 }
4704
4705 static void free_device_extent_record(struct cache_extent *cache)
4706 {
4707         struct device_extent_record *rec;
4708
4709         rec = container_of(cache, struct device_extent_record, cache);
4710         if (!list_empty(&rec->chunk_list))
4711                 list_del_init(&rec->chunk_list);
4712         if (!list_empty(&rec->device_list))
4713                 list_del_init(&rec->device_list);
4714         free(rec);
4715 }
4716
4717 void free_device_extent_tree(struct device_extent_tree *tree)
4718 {
4719         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4720 }
4721
4722 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4723 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4724                                  struct extent_buffer *leaf, int slot)
4725 {
4726         struct btrfs_extent_ref_v0 *ref0;
4727         struct btrfs_key key;
4728
4729         btrfs_item_key_to_cpu(leaf, &key, slot);
4730         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4731         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4732                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4733         } else {
4734                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4735                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4736         }
4737         return 0;
4738 }
4739 #endif
4740
4741 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4742                                             struct btrfs_key *key,
4743                                             int slot)
4744 {
4745         struct btrfs_chunk *ptr;
4746         struct chunk_record *rec;
4747         int num_stripes, i;
4748
4749         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4750         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4751
4752         rec = malloc(btrfs_chunk_record_size(num_stripes));
4753         if (!rec) {
4754                 fprintf(stderr, "memory allocation failed\n");
4755                 exit(-1);
4756         }
4757
4758         memset(rec, 0, btrfs_chunk_record_size(num_stripes));
4759
4760         INIT_LIST_HEAD(&rec->list);
4761         INIT_LIST_HEAD(&rec->dextents);
4762         rec->bg_rec = NULL;
4763
4764         rec->cache.start = key->offset;
4765         rec->cache.size = btrfs_chunk_length(leaf, ptr);
4766
4767         rec->generation = btrfs_header_generation(leaf);
4768
4769         rec->objectid = key->objectid;
4770         rec->type = key->type;
4771         rec->offset = key->offset;
4772
4773         rec->length = rec->cache.size;
4774         rec->owner = btrfs_chunk_owner(leaf, ptr);
4775         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4776         rec->type_flags = btrfs_chunk_type(leaf, ptr);
4777         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4778         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4779         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4780         rec->num_stripes = num_stripes;
4781         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4782
4783         for (i = 0; i < rec->num_stripes; ++i) {
4784                 rec->stripes[i].devid =
4785                         btrfs_stripe_devid_nr(leaf, ptr, i);
4786                 rec->stripes[i].offset =
4787                         btrfs_stripe_offset_nr(leaf, ptr, i);
4788                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4789                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4790                                 BTRFS_UUID_SIZE);
4791         }
4792
4793         return rec;
4794 }
4795
4796 static int process_chunk_item(struct cache_tree *chunk_cache,
4797                               struct btrfs_key *key, struct extent_buffer *eb,
4798                               int slot)
4799 {
4800         struct chunk_record *rec;
4801         int ret = 0;
4802
4803         rec = btrfs_new_chunk_record(eb, key, slot);
4804         ret = insert_cache_extent(chunk_cache, &rec->cache);
4805         if (ret) {
4806                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4807                         rec->offset, rec->length);
4808                 free(rec);
4809         }
4810
4811         return ret;
4812 }
4813
4814 static int process_device_item(struct rb_root *dev_cache,
4815                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4816 {
4817         struct btrfs_dev_item *ptr;
4818         struct device_record *rec;
4819         int ret = 0;
4820
4821         ptr = btrfs_item_ptr(eb,
4822                 slot, struct btrfs_dev_item);
4823
4824         rec = malloc(sizeof(*rec));
4825         if (!rec) {
4826                 fprintf(stderr, "memory allocation failed\n");
4827                 return -ENOMEM;
4828         }
4829
4830         rec->devid = key->offset;
4831         rec->generation = btrfs_header_generation(eb);
4832
4833         rec->objectid = key->objectid;
4834         rec->type = key->type;
4835         rec->offset = key->offset;
4836
4837         rec->devid = btrfs_device_id(eb, ptr);
4838         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4839         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4840
4841         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4842         if (ret) {
4843                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4844                 free(rec);
4845         }
4846
4847         return ret;
4848 }
4849
4850 struct block_group_record *
4851 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4852                              int slot)
4853 {
4854         struct btrfs_block_group_item *ptr;
4855         struct block_group_record *rec;
4856
4857         rec = malloc(sizeof(*rec));
4858         if (!rec) {
4859                 fprintf(stderr, "memory allocation failed\n");
4860                 exit(-1);
4861         }
4862         memset(rec, 0, sizeof(*rec));
4863
4864         rec->cache.start = key->objectid;
4865         rec->cache.size = key->offset;
4866
4867         rec->generation = btrfs_header_generation(leaf);
4868
4869         rec->objectid = key->objectid;
4870         rec->type = key->type;
4871         rec->offset = key->offset;
4872
4873         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
4874         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
4875
4876         INIT_LIST_HEAD(&rec->list);
4877
4878         return rec;
4879 }
4880
4881 static int process_block_group_item(struct block_group_tree *block_group_cache,
4882                                     struct btrfs_key *key,
4883                                     struct extent_buffer *eb, int slot)
4884 {
4885         struct block_group_record *rec;
4886         int ret = 0;
4887
4888         rec = btrfs_new_block_group_record(eb, key, slot);
4889         ret = insert_block_group_record(block_group_cache, rec);
4890         if (ret) {
4891                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
4892                         rec->objectid, rec->offset);
4893                 free(rec);
4894         }
4895
4896         return ret;
4897 }
4898
4899 struct device_extent_record *
4900 btrfs_new_device_extent_record(struct extent_buffer *leaf,
4901                                struct btrfs_key *key, int slot)
4902 {
4903         struct device_extent_record *rec;
4904         struct btrfs_dev_extent *ptr;
4905
4906         rec = malloc(sizeof(*rec));
4907         if (!rec) {
4908                 fprintf(stderr, "memory allocation failed\n");
4909                 exit(-1);
4910         }
4911         memset(rec, 0, sizeof(*rec));
4912
4913         rec->cache.objectid = key->objectid;
4914         rec->cache.start = key->offset;
4915
4916         rec->generation = btrfs_header_generation(leaf);
4917
4918         rec->objectid = key->objectid;
4919         rec->type = key->type;
4920         rec->offset = key->offset;
4921
4922         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
4923         rec->chunk_objecteid =
4924                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
4925         rec->chunk_offset =
4926                 btrfs_dev_extent_chunk_offset(leaf, ptr);
4927         rec->length = btrfs_dev_extent_length(leaf, ptr);
4928         rec->cache.size = rec->length;
4929
4930         INIT_LIST_HEAD(&rec->chunk_list);
4931         INIT_LIST_HEAD(&rec->device_list);
4932
4933         return rec;
4934 }
4935
4936 static int
4937 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
4938                            struct btrfs_key *key, struct extent_buffer *eb,
4939                            int slot)
4940 {
4941         struct device_extent_record *rec;
4942         int ret;
4943
4944         rec = btrfs_new_device_extent_record(eb, key, slot);
4945         ret = insert_device_extent_record(dev_extent_cache, rec);
4946         if (ret) {
4947                 fprintf(stderr,
4948                         "Device extent[%llu, %llu, %llu] existed.\n",
4949                         rec->objectid, rec->offset, rec->length);
4950                 free(rec);
4951         }
4952
4953         return ret;
4954 }
4955
4956 static int process_extent_item(struct btrfs_root *root,
4957                                struct cache_tree *extent_cache,
4958                                struct extent_buffer *eb, int slot)
4959 {
4960         struct btrfs_extent_item *ei;
4961         struct btrfs_extent_inline_ref *iref;
4962         struct btrfs_extent_data_ref *dref;
4963         struct btrfs_shared_data_ref *sref;
4964         struct btrfs_key key;
4965         unsigned long end;
4966         unsigned long ptr;
4967         int type;
4968         u32 item_size = btrfs_item_size_nr(eb, slot);
4969         u64 refs = 0;
4970         u64 offset;
4971         u64 num_bytes;
4972         int metadata = 0;
4973
4974         btrfs_item_key_to_cpu(eb, &key, slot);
4975
4976         if (key.type == BTRFS_METADATA_ITEM_KEY) {
4977                 metadata = 1;
4978                 num_bytes = root->leafsize;
4979         } else {
4980                 num_bytes = key.offset;
4981         }
4982
4983         if (item_size < sizeof(*ei)) {
4984 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4985                 struct btrfs_extent_item_v0 *ei0;
4986                 BUG_ON(item_size != sizeof(*ei0));
4987                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
4988                 refs = btrfs_extent_refs_v0(eb, ei0);
4989 #else
4990                 BUG();
4991 #endif
4992                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
4993                                       num_bytes, refs, 0, 0, 0, metadata, 1,
4994                                       num_bytes);
4995         }
4996
4997         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
4998         refs = btrfs_extent_refs(eb, ei);
4999
5000         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5001                        refs, 0, 0, 0, metadata, 1, num_bytes);
5002
5003         ptr = (unsigned long)(ei + 1);
5004         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5005             key.type == BTRFS_EXTENT_ITEM_KEY)
5006                 ptr += sizeof(struct btrfs_tree_block_info);
5007
5008         end = (unsigned long)ei + item_size;
5009         while (ptr < end) {
5010                 iref = (struct btrfs_extent_inline_ref *)ptr;
5011                 type = btrfs_extent_inline_ref_type(eb, iref);
5012                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5013                 switch (type) {
5014                 case BTRFS_TREE_BLOCK_REF_KEY:
5015                         add_tree_backref(extent_cache, key.objectid,
5016                                          0, offset, 0);
5017                         break;
5018                 case BTRFS_SHARED_BLOCK_REF_KEY:
5019                         add_tree_backref(extent_cache, key.objectid,
5020                                          offset, 0, 0);
5021                         break;
5022                 case BTRFS_EXTENT_DATA_REF_KEY:
5023                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5024                         add_data_backref(extent_cache, key.objectid, 0,
5025                                         btrfs_extent_data_ref_root(eb, dref),
5026                                         btrfs_extent_data_ref_objectid(eb,
5027                                                                        dref),
5028                                         btrfs_extent_data_ref_offset(eb, dref),
5029                                         btrfs_extent_data_ref_count(eb, dref),
5030                                         0, num_bytes);
5031                         break;
5032                 case BTRFS_SHARED_DATA_REF_KEY:
5033                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5034                         add_data_backref(extent_cache, key.objectid, offset,
5035                                         0, 0, 0,
5036                                         btrfs_shared_data_ref_count(eb, sref),
5037                                         0, num_bytes);
5038                         break;
5039                 default:
5040                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5041                                 key.objectid, key.type, num_bytes);
5042                         goto out;
5043                 }
5044                 ptr += btrfs_extent_inline_ref_size(type);
5045         }
5046         WARN_ON(ptr > end);
5047 out:
5048         return 0;
5049 }
5050
5051 static int check_cache_range(struct btrfs_root *root,
5052                              struct btrfs_block_group_cache *cache,
5053                              u64 offset, u64 bytes)
5054 {
5055         struct btrfs_free_space *entry;
5056         u64 *logical;
5057         u64 bytenr;
5058         int stripe_len;
5059         int i, nr, ret;
5060
5061         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5062                 bytenr = btrfs_sb_offset(i);
5063                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5064                                        cache->key.objectid, bytenr, 0,
5065                                        &logical, &nr, &stripe_len);
5066                 if (ret)
5067                         return ret;
5068
5069                 while (nr--) {
5070                         if (logical[nr] + stripe_len <= offset)
5071                                 continue;
5072                         if (offset + bytes <= logical[nr])
5073                                 continue;
5074                         if (logical[nr] == offset) {
5075                                 if (stripe_len >= bytes) {
5076                                         kfree(logical);
5077                                         return 0;
5078                                 }
5079                                 bytes -= stripe_len;
5080                                 offset += stripe_len;
5081                         } else if (logical[nr] < offset) {
5082                                 if (logical[nr] + stripe_len >=
5083                                     offset + bytes) {
5084                                         kfree(logical);
5085                                         return 0;
5086                                 }
5087                                 bytes = (offset + bytes) -
5088                                         (logical[nr] + stripe_len);
5089                                 offset = logical[nr] + stripe_len;
5090                         } else {
5091                                 /*
5092                                  * Could be tricky, the super may land in the
5093                                  * middle of the area we're checking.  First
5094                                  * check the easiest case, it's at the end.
5095                                  */
5096                                 if (logical[nr] + stripe_len >=
5097                                     bytes + offset) {
5098                                         bytes = logical[nr] - offset;
5099                                         continue;
5100                                 }
5101
5102                                 /* Check the left side */
5103                                 ret = check_cache_range(root, cache,
5104                                                         offset,
5105                                                         logical[nr] - offset);
5106                                 if (ret) {
5107                                         kfree(logical);
5108                                         return ret;
5109                                 }
5110
5111                                 /* Now we continue with the right side */
5112                                 bytes = (offset + bytes) -
5113                                         (logical[nr] + stripe_len);
5114                                 offset = logical[nr] + stripe_len;
5115                         }
5116                 }
5117
5118                 kfree(logical);
5119         }
5120
5121         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5122         if (!entry) {
5123                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5124                         offset, offset+bytes);
5125                 return -EINVAL;
5126         }
5127
5128         if (entry->offset != offset) {
5129                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5130                         entry->offset);
5131                 return -EINVAL;
5132         }
5133
5134         if (entry->bytes != bytes) {
5135                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5136                         bytes, entry->bytes, offset);
5137                 return -EINVAL;
5138         }
5139
5140         unlink_free_space(cache->free_space_ctl, entry);
5141         free(entry);
5142         return 0;
5143 }
5144
5145 static int verify_space_cache(struct btrfs_root *root,
5146                               struct btrfs_block_group_cache *cache)
5147 {
5148         struct btrfs_path *path;
5149         struct extent_buffer *leaf;
5150         struct btrfs_key key;
5151         u64 last;
5152         int ret = 0;
5153
5154         path = btrfs_alloc_path();
5155         if (!path)
5156                 return -ENOMEM;
5157
5158         root = root->fs_info->extent_root;
5159
5160         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5161
5162         key.objectid = last;
5163         key.offset = 0;
5164         key.type = BTRFS_EXTENT_ITEM_KEY;
5165
5166         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5167         if (ret < 0)
5168                 goto out;
5169         ret = 0;
5170         while (1) {
5171                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5172                         ret = btrfs_next_leaf(root, path);
5173                         if (ret < 0)
5174                                 goto out;
5175                         if (ret > 0) {
5176                                 ret = 0;
5177                                 break;
5178                         }
5179                 }
5180                 leaf = path->nodes[0];
5181                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5182                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5183                         break;
5184                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5185                     key.type != BTRFS_METADATA_ITEM_KEY) {
5186                         path->slots[0]++;
5187                         continue;
5188                 }
5189
5190                 if (last == key.objectid) {
5191                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5192                                 last = key.objectid + key.offset;
5193                         else
5194                                 last = key.objectid + root->leafsize;
5195                         path->slots[0]++;
5196                         continue;
5197                 }
5198
5199                 ret = check_cache_range(root, cache, last,
5200                                         key.objectid - last);
5201                 if (ret)
5202                         break;
5203                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5204                         last = key.objectid + key.offset;
5205                 else
5206                         last = key.objectid + root->leafsize;
5207                 path->slots[0]++;
5208         }
5209
5210         if (last < cache->key.objectid + cache->key.offset)
5211                 ret = check_cache_range(root, cache, last,
5212                                         cache->key.objectid +
5213                                         cache->key.offset - last);
5214
5215 out:
5216         btrfs_free_path(path);
5217
5218         if (!ret &&
5219             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5220                 fprintf(stderr, "There are still entries left in the space "
5221                         "cache\n");
5222                 ret = -EINVAL;
5223         }
5224
5225         return ret;
5226 }
5227
5228 static int check_space_cache(struct btrfs_root *root)
5229 {
5230         struct btrfs_block_group_cache *cache;
5231         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5232         int ret;
5233         int error = 0;
5234
5235         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5236             btrfs_super_generation(root->fs_info->super_copy) !=
5237             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5238                 printf("cache and super generation don't match, space cache "
5239                        "will be invalidated\n");
5240                 return 0;
5241         }
5242
5243         while (1) {
5244                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5245                 if (!cache)
5246                         break;
5247
5248                 start = cache->key.objectid + cache->key.offset;
5249                 if (!cache->free_space_ctl) {
5250                         if (btrfs_init_free_space_ctl(cache,
5251                                                       root->sectorsize)) {
5252                                 ret = -ENOMEM;
5253                                 break;
5254                         }
5255                 } else {
5256                         btrfs_remove_free_space_cache(cache);
5257                 }
5258
5259                 ret = load_free_space_cache(root->fs_info, cache);
5260                 if (!ret)
5261                         continue;
5262
5263                 ret = verify_space_cache(root, cache);
5264                 if (ret) {
5265                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
5266                                 cache->key.objectid);
5267                         error++;
5268                 }
5269         }
5270
5271         return error ? -EINVAL : 0;
5272 }
5273
5274 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5275                         u64 num_bytes, unsigned long leaf_offset,
5276                         struct extent_buffer *eb) {
5277
5278         u64 offset = 0;
5279         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5280         char *data;
5281         unsigned long csum_offset;
5282         u32 csum;
5283         u32 csum_expected;
5284         u64 read_len;
5285         u64 data_checked = 0;
5286         u64 tmp;
5287         int ret = 0;
5288         int mirror;
5289         int num_copies;
5290
5291         if (num_bytes % root->sectorsize)
5292                 return -EINVAL;
5293
5294         data = malloc(num_bytes);
5295         if (!data)
5296                 return -ENOMEM;
5297
5298         while (offset < num_bytes) {
5299                 mirror = 0;
5300 again:
5301                 read_len = num_bytes - offset;
5302                 /* read as much space once a time */
5303                 ret = read_extent_data(root, data + offset,
5304                                 bytenr + offset, &read_len, mirror);
5305                 if (ret)
5306                         goto out;
5307                 data_checked = 0;
5308                 /* verify every 4k data's checksum */
5309                 while (data_checked < read_len) {
5310                         csum = ~(u32)0;
5311                         tmp = offset + data_checked;
5312
5313                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5314                                                csum, root->sectorsize);
5315                         btrfs_csum_final(csum, (char *)&csum);
5316
5317                         csum_offset = leaf_offset +
5318                                  tmp / root->sectorsize * csum_size;
5319                         read_extent_buffer(eb, (char *)&csum_expected,
5320                                            csum_offset, csum_size);
5321                         /* try another mirror */
5322                         if (csum != csum_expected) {
5323                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5324                                                 mirror, bytenr + tmp,
5325                                                 csum, csum_expected);
5326                                 num_copies = btrfs_num_copies(
5327                                                 &root->fs_info->mapping_tree,
5328                                                 bytenr, num_bytes);
5329                                 if (mirror < num_copies - 1) {
5330                                         mirror += 1;
5331                                         goto again;
5332                                 }
5333                         }
5334                         data_checked += root->sectorsize;
5335                 }
5336                 offset += read_len;
5337         }
5338 out:
5339         free(data);
5340         return ret;
5341 }
5342
5343 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5344                                u64 num_bytes)
5345 {
5346         struct btrfs_path *path;
5347         struct extent_buffer *leaf;
5348         struct btrfs_key key;
5349         int ret;
5350
5351         path = btrfs_alloc_path();
5352         if (!path) {
5353                 fprintf(stderr, "Error allocing path\n");
5354                 return -ENOMEM;
5355         }
5356
5357         key.objectid = bytenr;
5358         key.type = BTRFS_EXTENT_ITEM_KEY;
5359         key.offset = (u64)-1;
5360
5361 again:
5362         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5363                                 0, 0);
5364         if (ret < 0) {
5365                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5366                 btrfs_free_path(path);
5367                 return ret;
5368         } else if (ret) {
5369                 if (path->slots[0] > 0) {
5370                         path->slots[0]--;
5371                 } else {
5372                         ret = btrfs_prev_leaf(root, path);
5373                         if (ret < 0) {
5374                                 goto out;
5375                         } else if (ret > 0) {
5376                                 ret = 0;
5377                                 goto out;
5378                         }
5379                 }
5380         }
5381
5382         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5383
5384         /*
5385          * Block group items come before extent items if they have the same
5386          * bytenr, so walk back one more just in case.  Dear future traveler,
5387          * first congrats on mastering time travel.  Now if it's not too much
5388          * trouble could you go back to 2006 and tell Chris to make the
5389          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5390          * EXTENT_ITEM_KEY please?
5391          */
5392         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5393                 if (path->slots[0] > 0) {
5394                         path->slots[0]--;
5395                 } else {
5396                         ret = btrfs_prev_leaf(root, path);
5397                         if (ret < 0) {
5398                                 goto out;
5399                         } else if (ret > 0) {
5400                                 ret = 0;
5401                                 goto out;
5402                         }
5403                 }
5404                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5405         }
5406
5407         while (num_bytes) {
5408                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5409                         ret = btrfs_next_leaf(root, path);
5410                         if (ret < 0) {
5411                                 fprintf(stderr, "Error going to next leaf "
5412                                         "%d\n", ret);
5413                                 btrfs_free_path(path);
5414                                 return ret;
5415                         } else if (ret) {
5416                                 break;
5417                         }
5418                 }
5419                 leaf = path->nodes[0];
5420                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5421                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5422                         path->slots[0]++;
5423                         continue;
5424                 }
5425                 if (key.objectid + key.offset < bytenr) {
5426                         path->slots[0]++;
5427                         continue;
5428                 }
5429                 if (key.objectid > bytenr + num_bytes)
5430                         break;
5431
5432                 if (key.objectid == bytenr) {
5433                         if (key.offset >= num_bytes) {
5434                                 num_bytes = 0;
5435                                 break;
5436                         }
5437                         num_bytes -= key.offset;
5438                         bytenr += key.offset;
5439                 } else if (key.objectid < bytenr) {
5440                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5441                                 num_bytes = 0;
5442                                 break;
5443                         }
5444                         num_bytes = (bytenr + num_bytes) -
5445                                 (key.objectid + key.offset);
5446                         bytenr = key.objectid + key.offset;
5447                 } else {
5448                         if (key.objectid + key.offset < bytenr + num_bytes) {
5449                                 u64 new_start = key.objectid + key.offset;
5450                                 u64 new_bytes = bytenr + num_bytes - new_start;
5451
5452                                 /*
5453                                  * Weird case, the extent is in the middle of
5454                                  * our range, we'll have to search one side
5455                                  * and then the other.  Not sure if this happens
5456                                  * in real life, but no harm in coding it up
5457                                  * anyway just in case.
5458                                  */
5459                                 btrfs_release_path(path);
5460                                 ret = check_extent_exists(root, new_start,
5461                                                           new_bytes);
5462                                 if (ret) {
5463                                         fprintf(stderr, "Right section didn't "
5464                                                 "have a record\n");
5465                                         break;
5466                                 }
5467                                 num_bytes = key.objectid - bytenr;
5468                                 goto again;
5469                         }
5470                         num_bytes = key.objectid - bytenr;
5471                 }
5472                 path->slots[0]++;
5473         }
5474         ret = 0;
5475
5476 out:
5477         if (num_bytes && !ret) {
5478                 fprintf(stderr, "There are no extents for csum range "
5479                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5480                 ret = 1;
5481         }
5482
5483         btrfs_free_path(path);
5484         return ret;
5485 }
5486
5487 static int check_csums(struct btrfs_root *root)
5488 {
5489         struct btrfs_path *path;
5490         struct extent_buffer *leaf;
5491         struct btrfs_key key;
5492         u64 offset = 0, num_bytes = 0;
5493         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5494         int errors = 0;
5495         int ret;
5496         u64 data_len;
5497         unsigned long leaf_offset;
5498
5499         root = root->fs_info->csum_root;
5500         if (!extent_buffer_uptodate(root->node)) {
5501                 fprintf(stderr, "No valid csum tree found\n");
5502                 return -ENOENT;
5503         }
5504
5505         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5506         key.type = BTRFS_EXTENT_CSUM_KEY;
5507         key.offset = 0;
5508
5509         path = btrfs_alloc_path();
5510         if (!path)
5511                 return -ENOMEM;
5512
5513         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5514         if (ret < 0) {
5515                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5516                 btrfs_free_path(path);
5517                 return ret;
5518         }
5519
5520         if (ret > 0 && path->slots[0])
5521                 path->slots[0]--;
5522         ret = 0;
5523
5524         while (1) {
5525                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5526                         ret = btrfs_next_leaf(root, path);
5527                         if (ret < 0) {
5528                                 fprintf(stderr, "Error going to next leaf "
5529                                         "%d\n", ret);
5530                                 break;
5531                         }
5532                         if (ret)
5533                                 break;
5534                 }
5535                 leaf = path->nodes[0];
5536
5537                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5538                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5539                         path->slots[0]++;
5540                         continue;
5541                 }
5542
5543                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5544                               csum_size) * root->sectorsize;
5545                 if (!check_data_csum)
5546                         goto skip_csum_check;
5547                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5548                 ret = check_extent_csums(root, key.offset, data_len,
5549                                          leaf_offset, leaf);
5550                 if (ret)
5551                         break;
5552 skip_csum_check:
5553                 if (!num_bytes) {
5554                         offset = key.offset;
5555                 } else if (key.offset != offset + num_bytes) {
5556                         ret = check_extent_exists(root, offset, num_bytes);
5557                         if (ret) {
5558                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5559                                         "there is no extent record\n",
5560                                         offset, offset+num_bytes);
5561                                 errors++;
5562                         }
5563                         offset = key.offset;
5564                         num_bytes = 0;
5565                 }
5566                 num_bytes += data_len;
5567                 path->slots[0]++;
5568         }
5569
5570         btrfs_free_path(path);
5571         return errors;
5572 }
5573
5574 static int is_dropped_key(struct btrfs_key *key,
5575                           struct btrfs_key *drop_key) {
5576         if (key->objectid < drop_key->objectid)
5577                 return 1;
5578         else if (key->objectid == drop_key->objectid) {
5579                 if (key->type < drop_key->type)
5580                         return 1;
5581                 else if (key->type == drop_key->type) {
5582                         if (key->offset < drop_key->offset)
5583                                 return 1;
5584                 }
5585         }
5586         return 0;
5587 }
5588
5589 /*
5590  * Here are the rules for FULL_BACKREF.
5591  *
5592  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5593  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5594  *      FULL_BACKREF set.
5595  * 3) We cow'ed the block walking down a reloc tree.  This is impossible to tell
5596  *    if it happened after the relocation occurred since we'll have dropped the
5597  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5598  *    have no real way to know for sure.
5599  *
5600  * We process the blocks one root at a time, and we start from the lowest root
5601  * objectid and go to the highest.  So we can just lookup the owner backref for
5602  * the record and if we don't find it then we know it doesn't exist and we have
5603  * a FULL BACKREF.
5604  *
5605  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5606  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5607  * be set or not and then we can check later once we've gathered all the refs.
5608  */
5609 static int calc_extent_flag(struct btrfs_root *root,
5610                            struct cache_tree *extent_cache,
5611                            struct extent_buffer *buf,
5612                            struct root_item_record *ri,
5613                            u64 *flags)
5614 {
5615         struct extent_record *rec;
5616         struct cache_extent *cache;
5617         struct tree_backref *tback;
5618         u64 owner = 0;
5619
5620         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5621         /* we have added this extent before */
5622         BUG_ON(!cache);
5623         rec = container_of(cache, struct extent_record, cache);
5624
5625         /*
5626          * Except file/reloc tree, we can not have
5627          * FULL BACKREF MODE
5628          */
5629         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5630                 goto normal;
5631         /*
5632          * root node
5633          */
5634         if (buf->start == ri->bytenr)
5635                 goto normal;
5636
5637         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5638                 goto full_backref;
5639
5640         owner = btrfs_header_owner(buf);
5641         if (owner == ri->objectid)
5642                 goto normal;
5643
5644         tback = find_tree_backref(rec, 0, owner);
5645         if (!tback)
5646                 goto full_backref;
5647 normal:
5648         *flags = 0;
5649         if (rec->flag_block_full_backref != -1 &&
5650             rec->flag_block_full_backref != 0)
5651                 rec->bad_full_backref = 1;
5652         return 0;
5653 full_backref:
5654         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5655         if (rec->flag_block_full_backref != -1 &&
5656             rec->flag_block_full_backref != 1)
5657                 rec->bad_full_backref = 1;
5658         return 0;
5659 }
5660
5661 static int run_next_block(struct btrfs_root *root,
5662                           struct block_info *bits,
5663                           int bits_nr,
5664                           u64 *last,
5665                           struct cache_tree *pending,
5666                           struct cache_tree *seen,
5667                           struct cache_tree *reada,
5668                           struct cache_tree *nodes,
5669                           struct cache_tree *extent_cache,
5670                           struct cache_tree *chunk_cache,
5671                           struct rb_root *dev_cache,
5672                           struct block_group_tree *block_group_cache,
5673                           struct device_extent_tree *dev_extent_cache,
5674                           struct root_item_record *ri)
5675 {
5676         struct extent_buffer *buf;
5677         struct extent_record *rec = NULL;
5678         u64 bytenr;
5679         u32 size;
5680         u64 parent;
5681         u64 owner;
5682         u64 flags;
5683         u64 ptr;
5684         u64 gen = 0;
5685         int ret = 0;
5686         int i;
5687         int nritems;
5688         struct btrfs_key key;
5689         struct cache_extent *cache;
5690         int reada_bits;
5691
5692         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5693                                     bits_nr, &reada_bits);
5694         if (nritems == 0)
5695                 return 1;
5696
5697         if (!reada_bits) {
5698                 for(i = 0; i < nritems; i++) {
5699                         ret = add_cache_extent(reada, bits[i].start,
5700                                                bits[i].size);
5701                         if (ret == -EEXIST)
5702                                 continue;
5703
5704                         /* fixme, get the parent transid */
5705                         readahead_tree_block(root, bits[i].start,
5706                                              bits[i].size, 0);
5707                 }
5708         }
5709         *last = bits[0].start;
5710         bytenr = bits[0].start;
5711         size = bits[0].size;
5712
5713         cache = lookup_cache_extent(pending, bytenr, size);
5714         if (cache) {
5715                 remove_cache_extent(pending, cache);
5716                 free(cache);
5717         }
5718         cache = lookup_cache_extent(reada, bytenr, size);
5719         if (cache) {
5720                 remove_cache_extent(reada, cache);
5721                 free(cache);
5722         }
5723         cache = lookup_cache_extent(nodes, bytenr, size);
5724         if (cache) {
5725                 remove_cache_extent(nodes, cache);
5726                 free(cache);
5727         }
5728         cache = lookup_cache_extent(extent_cache, bytenr, size);
5729         if (cache) {
5730                 rec = container_of(cache, struct extent_record, cache);
5731                 gen = rec->parent_generation;
5732         }
5733
5734         /* fixme, get the real parent transid */
5735         buf = read_tree_block(root, bytenr, size, gen);
5736         if (!extent_buffer_uptodate(buf)) {
5737                 record_bad_block_io(root->fs_info,
5738                                     extent_cache, bytenr, size);
5739                 goto out;
5740         }
5741
5742         nritems = btrfs_header_nritems(buf);
5743
5744         flags = 0;
5745         if (!init_extent_tree) {
5746                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5747                                        btrfs_header_level(buf), 1, NULL,
5748                                        &flags);
5749                 if (ret < 0) {
5750                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5751                         if (ret < 0) {
5752                                 fprintf(stderr, "Couldn't calc extent flags\n");
5753                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5754                         }
5755                 }
5756         } else {
5757                 flags = 0;
5758                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5759                 if (ret < 0) {
5760                         fprintf(stderr, "Couldn't calc extent flags\n");
5761                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5762                 }
5763         }
5764
5765         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5766                 if (ri != NULL &&
5767                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5768                     ri->objectid == btrfs_header_owner(buf)) {
5769                         /*
5770                          * Ok we got to this block from it's original owner and
5771                          * we have FULL_BACKREF set.  Relocation can leave
5772                          * converted blocks over so this is altogether possible,
5773                          * however it's not possible if the generation > the
5774                          * last snapshot, so check for this case.
5775                          */
5776                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5777                             btrfs_header_generation(buf) > ri->last_snapshot) {
5778                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5779                                 rec->bad_full_backref = 1;
5780                         }
5781                 }
5782         } else {
5783                 if (ri != NULL &&
5784                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5785                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5786                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5787                         rec->bad_full_backref = 1;
5788                 }
5789         }
5790
5791         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5792                 rec->flag_block_full_backref = 1;
5793                 parent = bytenr;
5794                 owner = 0;
5795         } else {
5796                 rec->flag_block_full_backref = 0;
5797                 parent = 0;
5798                 owner = btrfs_header_owner(buf);
5799         }
5800
5801         ret = check_block(root, extent_cache, buf, flags);
5802         if (ret)
5803                 goto out;
5804
5805         if (btrfs_is_leaf(buf)) {
5806                 btree_space_waste += btrfs_leaf_free_space(root, buf);
5807                 for (i = 0; i < nritems; i++) {
5808                         struct btrfs_file_extent_item *fi;
5809                         btrfs_item_key_to_cpu(buf, &key, i);
5810                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5811                                 process_extent_item(root, extent_cache, buf,
5812                                                     i);
5813                                 continue;
5814                         }
5815                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5816                                 process_extent_item(root, extent_cache, buf,
5817                                                     i);
5818                                 continue;
5819                         }
5820                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5821                                 total_csum_bytes +=
5822                                         btrfs_item_size_nr(buf, i);
5823                                 continue;
5824                         }
5825                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5826                                 process_chunk_item(chunk_cache, &key, buf, i);
5827                                 continue;
5828                         }
5829                         if (key.type == BTRFS_DEV_ITEM_KEY) {
5830                                 process_device_item(dev_cache, &key, buf, i);
5831                                 continue;
5832                         }
5833                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5834                                 process_block_group_item(block_group_cache,
5835                                         &key, buf, i);
5836                                 continue;
5837                         }
5838                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
5839                                 process_device_extent_item(dev_extent_cache,
5840                                         &key, buf, i);
5841                                 continue;
5842
5843                         }
5844                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
5845 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5846                                 process_extent_ref_v0(extent_cache, buf, i);
5847 #else
5848                                 BUG();
5849 #endif
5850                                 continue;
5851                         }
5852
5853                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
5854                                 add_tree_backref(extent_cache, key.objectid, 0,
5855                                                  key.offset, 0);
5856                                 continue;
5857                         }
5858                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
5859                                 add_tree_backref(extent_cache, key.objectid,
5860                                                  key.offset, 0, 0);
5861                                 continue;
5862                         }
5863                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
5864                                 struct btrfs_extent_data_ref *ref;
5865                                 ref = btrfs_item_ptr(buf, i,
5866                                                 struct btrfs_extent_data_ref);
5867                                 add_data_backref(extent_cache,
5868                                         key.objectid, 0,
5869                                         btrfs_extent_data_ref_root(buf, ref),
5870                                         btrfs_extent_data_ref_objectid(buf,
5871                                                                        ref),
5872                                         btrfs_extent_data_ref_offset(buf, ref),
5873                                         btrfs_extent_data_ref_count(buf, ref),
5874                                         0, root->sectorsize);
5875                                 continue;
5876                         }
5877                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
5878                                 struct btrfs_shared_data_ref *ref;
5879                                 ref = btrfs_item_ptr(buf, i,
5880                                                 struct btrfs_shared_data_ref);
5881                                 add_data_backref(extent_cache,
5882                                         key.objectid, key.offset, 0, 0, 0,
5883                                         btrfs_shared_data_ref_count(buf, ref),
5884                                         0, root->sectorsize);
5885                                 continue;
5886                         }
5887                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
5888                                 struct bad_item *bad;
5889
5890                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
5891                                         continue;
5892                                 if (!owner)
5893                                         continue;
5894                                 bad = malloc(sizeof(struct bad_item));
5895                                 if (!bad)
5896                                         continue;
5897                                 INIT_LIST_HEAD(&bad->list);
5898                                 memcpy(&bad->key, &key,
5899                                        sizeof(struct btrfs_key));
5900                                 bad->root_id = owner;
5901                                 list_add_tail(&bad->list, &delete_items);
5902                                 continue;
5903                         }
5904                         if (key.type != BTRFS_EXTENT_DATA_KEY)
5905                                 continue;
5906                         fi = btrfs_item_ptr(buf, i,
5907                                             struct btrfs_file_extent_item);
5908                         if (btrfs_file_extent_type(buf, fi) ==
5909                             BTRFS_FILE_EXTENT_INLINE)
5910                                 continue;
5911                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
5912                                 continue;
5913
5914                         data_bytes_allocated +=
5915                                 btrfs_file_extent_disk_num_bytes(buf, fi);
5916                         if (data_bytes_allocated < root->sectorsize) {
5917                                 abort();
5918                         }
5919                         data_bytes_referenced +=
5920                                 btrfs_file_extent_num_bytes(buf, fi);
5921                         add_data_backref(extent_cache,
5922                                 btrfs_file_extent_disk_bytenr(buf, fi),
5923                                 parent, owner, key.objectid, key.offset -
5924                                 btrfs_file_extent_offset(buf, fi), 1, 1,
5925                                 btrfs_file_extent_disk_num_bytes(buf, fi));
5926                 }
5927         } else {
5928                 int level;
5929                 struct btrfs_key first_key;
5930
5931                 first_key.objectid = 0;
5932
5933                 if (nritems > 0)
5934                         btrfs_item_key_to_cpu(buf, &first_key, 0);
5935                 level = btrfs_header_level(buf);
5936                 for (i = 0; i < nritems; i++) {
5937                         ptr = btrfs_node_blockptr(buf, i);
5938                         size = btrfs_level_size(root, level - 1);
5939                         btrfs_node_key_to_cpu(buf, &key, i);
5940                         if (ri != NULL) {
5941                                 if ((level == ri->drop_level)
5942                                     && is_dropped_key(&key, &ri->drop_key)) {
5943                                         continue;
5944                                 }
5945                         }
5946                         ret = add_extent_rec(extent_cache, &key,
5947                                              btrfs_node_ptr_generation(buf, i),
5948                                              ptr, size, 0, 0, 1, 0, 1, 0,
5949                                              size);
5950                         BUG_ON(ret);
5951
5952                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
5953
5954                         if (level > 1) {
5955                                 add_pending(nodes, seen, ptr, size);
5956                         } else {
5957                                 add_pending(pending, seen, ptr, size);
5958                         }
5959                 }
5960                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
5961                                       nritems) * sizeof(struct btrfs_key_ptr);
5962         }
5963         total_btree_bytes += buf->len;
5964         if (fs_root_objectid(btrfs_header_owner(buf)))
5965                 total_fs_tree_bytes += buf->len;
5966         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
5967                 total_extent_tree_bytes += buf->len;
5968         if (!found_old_backref &&
5969             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
5970             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
5971             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5972                 found_old_backref = 1;
5973 out:
5974         free_extent_buffer(buf);
5975         return ret;
5976 }
5977
5978 static int add_root_to_pending(struct extent_buffer *buf,
5979                                struct cache_tree *extent_cache,
5980                                struct cache_tree *pending,
5981                                struct cache_tree *seen,
5982                                struct cache_tree *nodes,
5983                                u64 objectid)
5984 {
5985         if (btrfs_header_level(buf) > 0)
5986                 add_pending(nodes, seen, buf->start, buf->len);
5987         else
5988                 add_pending(pending, seen, buf->start, buf->len);
5989         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
5990                        0, 1, 1, 0, 1, 0, buf->len);
5991
5992         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
5993             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
5994                 add_tree_backref(extent_cache, buf->start, buf->start,
5995                                  0, 1);
5996         else
5997                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
5998         return 0;
5999 }
6000
6001 /* as we fix the tree, we might be deleting blocks that
6002  * we're tracking for repair.  This hook makes sure we
6003  * remove any backrefs for blocks as we are fixing them.
6004  */
6005 static int free_extent_hook(struct btrfs_trans_handle *trans,
6006                             struct btrfs_root *root,
6007                             u64 bytenr, u64 num_bytes, u64 parent,
6008                             u64 root_objectid, u64 owner, u64 offset,
6009                             int refs_to_drop)
6010 {
6011         struct extent_record *rec;
6012         struct cache_extent *cache;
6013         int is_data;
6014         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6015
6016         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6017         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6018         if (!cache)
6019                 return 0;
6020
6021         rec = container_of(cache, struct extent_record, cache);
6022         if (is_data) {
6023                 struct data_backref *back;
6024                 back = find_data_backref(rec, parent, root_objectid, owner,
6025                                          offset, 1, bytenr, num_bytes);
6026                 if (!back)
6027                         goto out;
6028                 if (back->node.found_ref) {
6029                         back->found_ref -= refs_to_drop;
6030                         if (rec->refs)
6031                                 rec->refs -= refs_to_drop;
6032                 }
6033                 if (back->node.found_extent_tree) {
6034                         back->num_refs -= refs_to_drop;
6035                         if (rec->extent_item_refs)
6036                                 rec->extent_item_refs -= refs_to_drop;
6037                 }
6038                 if (back->found_ref == 0)
6039                         back->node.found_ref = 0;
6040                 if (back->num_refs == 0)
6041                         back->node.found_extent_tree = 0;
6042
6043                 if (!back->node.found_extent_tree && back->node.found_ref) {
6044                         list_del(&back->node.list);
6045                         free(back);
6046                 }
6047         } else {
6048                 struct tree_backref *back;
6049                 back = find_tree_backref(rec, parent, root_objectid);
6050                 if (!back)
6051                         goto out;
6052                 if (back->node.found_ref) {
6053                         if (rec->refs)
6054                                 rec->refs--;
6055                         back->node.found_ref = 0;
6056                 }
6057                 if (back->node.found_extent_tree) {
6058                         if (rec->extent_item_refs)
6059                                 rec->extent_item_refs--;
6060                         back->node.found_extent_tree = 0;
6061                 }
6062                 if (!back->node.found_extent_tree && back->node.found_ref) {
6063                         list_del(&back->node.list);
6064                         free(back);
6065                 }
6066         }
6067         maybe_free_extent_rec(extent_cache, rec);
6068 out:
6069         return 0;
6070 }
6071
6072 static int delete_extent_records(struct btrfs_trans_handle *trans,
6073                                  struct btrfs_root *root,
6074                                  struct btrfs_path *path,
6075                                  u64 bytenr, u64 new_len)
6076 {
6077         struct btrfs_key key;
6078         struct btrfs_key found_key;
6079         struct extent_buffer *leaf;
6080         int ret;
6081         int slot;
6082
6083
6084         key.objectid = bytenr;
6085         key.type = (u8)-1;
6086         key.offset = (u64)-1;
6087
6088         while(1) {
6089                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6090                                         &key, path, 0, 1);
6091                 if (ret < 0)
6092                         break;
6093
6094                 if (ret > 0) {
6095                         ret = 0;
6096                         if (path->slots[0] == 0)
6097                                 break;
6098                         path->slots[0]--;
6099                 }
6100                 ret = 0;
6101
6102                 leaf = path->nodes[0];
6103                 slot = path->slots[0];
6104
6105                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6106                 if (found_key.objectid != bytenr)
6107                         break;
6108
6109                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6110                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6111                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6112                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6113                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6114                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6115                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6116                         btrfs_release_path(path);
6117                         if (found_key.type == 0) {
6118                                 if (found_key.offset == 0)
6119                                         break;
6120                                 key.offset = found_key.offset - 1;
6121                                 key.type = found_key.type;
6122                         }
6123                         key.type = found_key.type - 1;
6124                         key.offset = (u64)-1;
6125                         continue;
6126                 }
6127
6128                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6129                         found_key.objectid, found_key.type, found_key.offset);
6130
6131                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6132                 if (ret)
6133                         break;
6134                 btrfs_release_path(path);
6135
6136                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6137                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6138                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6139                                 found_key.offset : root->leafsize;
6140
6141                         ret = btrfs_update_block_group(trans, root, bytenr,
6142                                                        bytes, 0, 0);
6143                         if (ret)
6144                                 break;
6145                 }
6146         }
6147
6148         btrfs_release_path(path);
6149         return ret;
6150 }
6151
6152 /*
6153  * for a single backref, this will allocate a new extent
6154  * and add the backref to it.
6155  */
6156 static int record_extent(struct btrfs_trans_handle *trans,
6157                          struct btrfs_fs_info *info,
6158                          struct btrfs_path *path,
6159                          struct extent_record *rec,
6160                          struct extent_backref *back,
6161                          int allocated, u64 flags)
6162 {
6163         int ret;
6164         struct btrfs_root *extent_root = info->extent_root;
6165         struct extent_buffer *leaf;
6166         struct btrfs_key ins_key;
6167         struct btrfs_extent_item *ei;
6168         struct tree_backref *tback;
6169         struct data_backref *dback;
6170         struct btrfs_tree_block_info *bi;
6171
6172         if (!back->is_data)
6173                 rec->max_size = max_t(u64, rec->max_size,
6174                                     info->extent_root->leafsize);
6175
6176         if (!allocated) {
6177                 u32 item_size = sizeof(*ei);
6178
6179                 if (!back->is_data)
6180                         item_size += sizeof(*bi);
6181
6182                 ins_key.objectid = rec->start;
6183                 ins_key.offset = rec->max_size;
6184                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6185
6186                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6187                                         &ins_key, item_size);
6188                 if (ret)
6189                         goto fail;
6190
6191                 leaf = path->nodes[0];
6192                 ei = btrfs_item_ptr(leaf, path->slots[0],
6193                                     struct btrfs_extent_item);
6194
6195                 btrfs_set_extent_refs(leaf, ei, 0);
6196                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6197
6198                 if (back->is_data) {
6199                         btrfs_set_extent_flags(leaf, ei,
6200                                                BTRFS_EXTENT_FLAG_DATA);
6201                 } else {
6202                         struct btrfs_disk_key copy_key;;
6203
6204                         tback = (struct tree_backref *)back;
6205                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6206                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6207                                              sizeof(*bi));
6208
6209                         btrfs_set_disk_key_objectid(&copy_key,
6210                                                     rec->info_objectid);
6211                         btrfs_set_disk_key_type(&copy_key, 0);
6212                         btrfs_set_disk_key_offset(&copy_key, 0);
6213
6214                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6215                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6216
6217                         btrfs_set_extent_flags(leaf, ei,
6218                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6219                 }
6220
6221                 btrfs_mark_buffer_dirty(leaf);
6222                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6223                                                rec->max_size, 1, 0);
6224                 if (ret)
6225                         goto fail;
6226                 btrfs_release_path(path);
6227         }
6228
6229         if (back->is_data) {
6230                 u64 parent;
6231                 int i;
6232
6233                 dback = (struct data_backref *)back;
6234                 if (back->full_backref)
6235                         parent = dback->parent;
6236                 else
6237                         parent = 0;
6238
6239                 for (i = 0; i < dback->found_ref; i++) {
6240                         /* if parent != 0, we're doing a full backref
6241                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6242                          * just makes the backref allocator create a data
6243                          * backref
6244                          */
6245                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6246                                                    rec->start, rec->max_size,
6247                                                    parent,
6248                                                    dback->root,
6249                                                    parent ?
6250                                                    BTRFS_FIRST_FREE_OBJECTID :
6251                                                    dback->owner,
6252                                                    dback->offset);
6253                         if (ret)
6254                                 break;
6255                 }
6256                 fprintf(stderr, "adding new data backref"
6257                                 " on %llu %s %llu owner %llu"
6258                                 " offset %llu found %d\n",
6259                                 (unsigned long long)rec->start,
6260                                 back->full_backref ?
6261                                 "parent" : "root",
6262                                 back->full_backref ?
6263                                 (unsigned long long)parent :
6264                                 (unsigned long long)dback->root,
6265                                 (unsigned long long)dback->owner,
6266                                 (unsigned long long)dback->offset,
6267                                 dback->found_ref);
6268         } else {
6269                 u64 parent;
6270
6271                 tback = (struct tree_backref *)back;
6272                 if (back->full_backref)
6273                         parent = tback->parent;
6274                 else
6275                         parent = 0;
6276
6277                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6278                                            rec->start, rec->max_size,
6279                                            parent, tback->root, 0, 0);
6280                 fprintf(stderr, "adding new tree backref on "
6281                         "start %llu len %llu parent %llu root %llu\n",
6282                         rec->start, rec->max_size, parent, tback->root);
6283         }
6284         if (ret)
6285                 goto fail;
6286 fail:
6287         btrfs_release_path(path);
6288         return ret;
6289 }
6290
6291 struct extent_entry {
6292         u64 bytenr;
6293         u64 bytes;
6294         int count;
6295         int broken;
6296         struct list_head list;
6297 };
6298
6299 static struct extent_entry *find_entry(struct list_head *entries,
6300                                        u64 bytenr, u64 bytes)
6301 {
6302         struct extent_entry *entry = NULL;
6303
6304         list_for_each_entry(entry, entries, list) {
6305                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6306                         return entry;
6307         }
6308
6309         return NULL;
6310 }
6311
6312 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6313 {
6314         struct extent_entry *entry, *best = NULL, *prev = NULL;
6315
6316         list_for_each_entry(entry, entries, list) {
6317                 if (!prev) {
6318                         prev = entry;
6319                         continue;
6320                 }
6321
6322                 /*
6323                  * If there are as many broken entries as entries then we know
6324                  * not to trust this particular entry.
6325                  */
6326                 if (entry->broken == entry->count)
6327                         continue;
6328
6329                 /*
6330                  * If our current entry == best then we can't be sure our best
6331                  * is really the best, so we need to keep searching.
6332                  */
6333                 if (best && best->count == entry->count) {
6334                         prev = entry;
6335                         best = NULL;
6336                         continue;
6337                 }
6338
6339                 /* Prev == entry, not good enough, have to keep searching */
6340                 if (!prev->broken && prev->count == entry->count)
6341                         continue;
6342
6343                 if (!best)
6344                         best = (prev->count > entry->count) ? prev : entry;
6345                 else if (best->count < entry->count)
6346                         best = entry;
6347                 prev = entry;
6348         }
6349
6350         return best;
6351 }
6352
6353 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6354                       struct data_backref *dback, struct extent_entry *entry)
6355 {
6356         struct btrfs_trans_handle *trans;
6357         struct btrfs_root *root;
6358         struct btrfs_file_extent_item *fi;
6359         struct extent_buffer *leaf;
6360         struct btrfs_key key;
6361         u64 bytenr, bytes;
6362         int ret, err;
6363
6364         key.objectid = dback->root;
6365         key.type = BTRFS_ROOT_ITEM_KEY;
6366         key.offset = (u64)-1;
6367         root = btrfs_read_fs_root(info, &key);
6368         if (IS_ERR(root)) {
6369                 fprintf(stderr, "Couldn't find root for our ref\n");
6370                 return -EINVAL;
6371         }
6372
6373         /*
6374          * The backref points to the original offset of the extent if it was
6375          * split, so we need to search down to the offset we have and then walk
6376          * forward until we find the backref we're looking for.
6377          */
6378         key.objectid = dback->owner;
6379         key.type = BTRFS_EXTENT_DATA_KEY;
6380         key.offset = dback->offset;
6381         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6382         if (ret < 0) {
6383                 fprintf(stderr, "Error looking up ref %d\n", ret);
6384                 return ret;
6385         }
6386
6387         while (1) {
6388                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6389                         ret = btrfs_next_leaf(root, path);
6390                         if (ret) {
6391                                 fprintf(stderr, "Couldn't find our ref, next\n");
6392                                 return -EINVAL;
6393                         }
6394                 }
6395                 leaf = path->nodes[0];
6396                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6397                 if (key.objectid != dback->owner ||
6398                     key.type != BTRFS_EXTENT_DATA_KEY) {
6399                         fprintf(stderr, "Couldn't find our ref, search\n");
6400                         return -EINVAL;
6401                 }
6402                 fi = btrfs_item_ptr(leaf, path->slots[0],
6403                                     struct btrfs_file_extent_item);
6404                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6405                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6406
6407                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6408                         break;
6409                 path->slots[0]++;
6410         }
6411
6412         btrfs_release_path(path);
6413
6414         trans = btrfs_start_transaction(root, 1);
6415         if (IS_ERR(trans))
6416                 return PTR_ERR(trans);
6417
6418         /*
6419          * Ok we have the key of the file extent we want to fix, now we can cow
6420          * down to the thing and fix it.
6421          */
6422         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6423         if (ret < 0) {
6424                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6425                         key.objectid, key.type, key.offset, ret);
6426                 goto out;
6427         }
6428         if (ret > 0) {
6429                 fprintf(stderr, "Well that's odd, we just found this key "
6430                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6431                         key.offset);
6432                 ret = -EINVAL;
6433                 goto out;
6434         }
6435         leaf = path->nodes[0];
6436         fi = btrfs_item_ptr(leaf, path->slots[0],
6437                             struct btrfs_file_extent_item);
6438
6439         if (btrfs_file_extent_compression(leaf, fi) &&
6440             dback->disk_bytenr != entry->bytenr) {
6441                 fprintf(stderr, "Ref doesn't match the record start and is "
6442                         "compressed, please take a btrfs-image of this file "
6443                         "system and send it to a btrfs developer so they can "
6444                         "complete this functionality for bytenr %Lu\n",
6445                         dback->disk_bytenr);
6446                 ret = -EINVAL;
6447                 goto out;
6448         }
6449
6450         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6451                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6452         } else if (dback->disk_bytenr > entry->bytenr) {
6453                 u64 off_diff, offset;
6454
6455                 off_diff = dback->disk_bytenr - entry->bytenr;
6456                 offset = btrfs_file_extent_offset(leaf, fi);
6457                 if (dback->disk_bytenr + offset +
6458                     btrfs_file_extent_num_bytes(leaf, fi) >
6459                     entry->bytenr + entry->bytes) {
6460                         fprintf(stderr, "Ref is past the entry end, please "
6461                                 "take a btrfs-image of this file system and "
6462                                 "send it to a btrfs developer, ref %Lu\n",
6463                                 dback->disk_bytenr);
6464                         ret = -EINVAL;
6465                         goto out;
6466                 }
6467                 offset += off_diff;
6468                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6469                 btrfs_set_file_extent_offset(leaf, fi, offset);
6470         } else if (dback->disk_bytenr < entry->bytenr) {
6471                 u64 offset;
6472
6473                 offset = btrfs_file_extent_offset(leaf, fi);
6474                 if (dback->disk_bytenr + offset < entry->bytenr) {
6475                         fprintf(stderr, "Ref is before the entry start, please"
6476                                 " take a btrfs-image of this file system and "
6477                                 "send it to a btrfs developer, ref %Lu\n",
6478                                 dback->disk_bytenr);
6479                         ret = -EINVAL;
6480                         goto out;
6481                 }
6482
6483                 offset += dback->disk_bytenr;
6484                 offset -= entry->bytenr;
6485                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6486                 btrfs_set_file_extent_offset(leaf, fi, offset);
6487         }
6488
6489         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6490
6491         /*
6492          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6493          * only do this if we aren't using compression, otherwise it's a
6494          * trickier case.
6495          */
6496         if (!btrfs_file_extent_compression(leaf, fi))
6497                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6498         else
6499                 printf("ram bytes may be wrong?\n");
6500         btrfs_mark_buffer_dirty(leaf);
6501 out:
6502         err = btrfs_commit_transaction(trans, root);
6503         btrfs_release_path(path);
6504         return ret ? ret : err;
6505 }
6506
6507 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6508                            struct extent_record *rec)
6509 {
6510         struct extent_backref *back;
6511         struct data_backref *dback;
6512         struct extent_entry *entry, *best = NULL;
6513         LIST_HEAD(entries);
6514         int nr_entries = 0;
6515         int broken_entries = 0;
6516         int ret = 0;
6517         short mismatch = 0;
6518
6519         /*
6520          * Metadata is easy and the backrefs should always agree on bytenr and
6521          * size, if not we've got bigger issues.
6522          */
6523         if (rec->metadata)
6524                 return 0;
6525
6526         list_for_each_entry(back, &rec->backrefs, list) {
6527                 if (back->full_backref || !back->is_data)
6528                         continue;
6529
6530                 dback = (struct data_backref *)back;
6531
6532                 /*
6533                  * We only pay attention to backrefs that we found a real
6534                  * backref for.
6535                  */
6536                 if (dback->found_ref == 0)
6537                         continue;
6538
6539                 /*
6540                  * For now we only catch when the bytes don't match, not the
6541                  * bytenr.  We can easily do this at the same time, but I want
6542                  * to have a fs image to test on before we just add repair
6543                  * functionality willy-nilly so we know we won't screw up the
6544                  * repair.
6545                  */
6546
6547                 entry = find_entry(&entries, dback->disk_bytenr,
6548                                    dback->bytes);
6549                 if (!entry) {
6550                         entry = malloc(sizeof(struct extent_entry));
6551                         if (!entry) {
6552                                 ret = -ENOMEM;
6553                                 goto out;
6554                         }
6555                         memset(entry, 0, sizeof(*entry));
6556                         entry->bytenr = dback->disk_bytenr;
6557                         entry->bytes = dback->bytes;
6558                         list_add_tail(&entry->list, &entries);
6559                         nr_entries++;
6560                 }
6561
6562                 /*
6563                  * If we only have on entry we may think the entries agree when
6564                  * in reality they don't so we have to do some extra checking.
6565                  */
6566                 if (dback->disk_bytenr != rec->start ||
6567                     dback->bytes != rec->nr || back->broken)
6568                         mismatch = 1;
6569
6570                 if (back->broken) {
6571                         entry->broken++;
6572                         broken_entries++;
6573                 }
6574
6575                 entry->count++;
6576         }
6577
6578         /* Yay all the backrefs agree, carry on good sir */
6579         if (nr_entries <= 1 && !mismatch)
6580                 goto out;
6581
6582         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6583                 "%Lu\n", rec->start);
6584
6585         /*
6586          * First we want to see if the backrefs can agree amongst themselves who
6587          * is right, so figure out which one of the entries has the highest
6588          * count.
6589          */
6590         best = find_most_right_entry(&entries);
6591
6592         /*
6593          * Ok so we may have an even split between what the backrefs think, so
6594          * this is where we use the extent ref to see what it thinks.
6595          */
6596         if (!best) {
6597                 entry = find_entry(&entries, rec->start, rec->nr);
6598                 if (!entry && (!broken_entries || !rec->found_rec)) {
6599                         fprintf(stderr, "Backrefs don't agree with each other "
6600                                 "and extent record doesn't agree with anybody,"
6601                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6602                                 rec->start, rec->nr);
6603                         ret = -EINVAL;
6604                         goto out;
6605                 } else if (!entry) {
6606                         /*
6607                          * Ok our backrefs were broken, we'll assume this is the
6608                          * correct value and add an entry for this range.
6609                          */
6610                         entry = malloc(sizeof(struct extent_entry));
6611                         if (!entry) {
6612                                 ret = -ENOMEM;
6613                                 goto out;
6614                         }
6615                         memset(entry, 0, sizeof(*entry));
6616                         entry->bytenr = rec->start;
6617                         entry->bytes = rec->nr;
6618                         list_add_tail(&entry->list, &entries);
6619                         nr_entries++;
6620                 }
6621                 entry->count++;
6622                 best = find_most_right_entry(&entries);
6623                 if (!best) {
6624                         fprintf(stderr, "Backrefs and extent record evenly "
6625                                 "split on who is right, this is going to "
6626                                 "require user input to fix bytenr %Lu bytes "
6627                                 "%Lu\n", rec->start, rec->nr);
6628                         ret = -EINVAL;
6629                         goto out;
6630                 }
6631         }
6632
6633         /*
6634          * I don't think this can happen currently as we'll abort() if we catch
6635          * this case higher up, but in case somebody removes that we still can't
6636          * deal with it properly here yet, so just bail out of that's the case.
6637          */
6638         if (best->bytenr != rec->start) {
6639                 fprintf(stderr, "Extent start and backref starts don't match, "
6640                         "please use btrfs-image on this file system and send "
6641                         "it to a btrfs developer so they can make fsck fix "
6642                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6643                         rec->start, rec->nr);
6644                 ret = -EINVAL;
6645                 goto out;
6646         }
6647
6648         /*
6649          * Ok great we all agreed on an extent record, let's go find the real
6650          * references and fix up the ones that don't match.
6651          */
6652         list_for_each_entry(back, &rec->backrefs, list) {
6653                 if (back->full_backref || !back->is_data)
6654                         continue;
6655
6656                 dback = (struct data_backref *)back;
6657
6658                 /*
6659                  * Still ignoring backrefs that don't have a real ref attached
6660                  * to them.
6661                  */
6662                 if (dback->found_ref == 0)
6663                         continue;
6664
6665                 if (dback->bytes == best->bytes &&
6666                     dback->disk_bytenr == best->bytenr)
6667                         continue;
6668
6669                 ret = repair_ref(info, path, dback, best);
6670                 if (ret)
6671                         goto out;
6672         }
6673
6674         /*
6675          * Ok we messed with the actual refs, which means we need to drop our
6676          * entire cache and go back and rescan.  I know this is a huge pain and
6677          * adds a lot of extra work, but it's the only way to be safe.  Once all
6678          * the backrefs agree we may not need to do anything to the extent
6679          * record itself.
6680          */
6681         ret = -EAGAIN;
6682 out:
6683         while (!list_empty(&entries)) {
6684                 entry = list_entry(entries.next, struct extent_entry, list);
6685                 list_del_init(&entry->list);
6686                 free(entry);
6687         }
6688         return ret;
6689 }
6690
6691 static int process_duplicates(struct btrfs_root *root,
6692                               struct cache_tree *extent_cache,
6693                               struct extent_record *rec)
6694 {
6695         struct extent_record *good, *tmp;
6696         struct cache_extent *cache;
6697         int ret;
6698
6699         /*
6700          * If we found a extent record for this extent then return, or if we
6701          * have more than one duplicate we are likely going to need to delete
6702          * something.
6703          */
6704         if (rec->found_rec || rec->num_duplicates > 1)
6705                 return 0;
6706
6707         /* Shouldn't happen but just in case */
6708         BUG_ON(!rec->num_duplicates);
6709
6710         /*
6711          * So this happens if we end up with a backref that doesn't match the
6712          * actual extent entry.  So either the backref is bad or the extent
6713          * entry is bad.  Either way we want to have the extent_record actually
6714          * reflect what we found in the extent_tree, so we need to take the
6715          * duplicate out and use that as the extent_record since the only way we
6716          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6717          */
6718         remove_cache_extent(extent_cache, &rec->cache);
6719
6720         good = list_entry(rec->dups.next, struct extent_record, list);
6721         list_del_init(&good->list);
6722         INIT_LIST_HEAD(&good->backrefs);
6723         INIT_LIST_HEAD(&good->dups);
6724         good->cache.start = good->start;
6725         good->cache.size = good->nr;
6726         good->content_checked = 0;
6727         good->owner_ref_checked = 0;
6728         good->num_duplicates = 0;
6729         good->refs = rec->refs;
6730         list_splice_init(&rec->backrefs, &good->backrefs);
6731         while (1) {
6732                 cache = lookup_cache_extent(extent_cache, good->start,
6733                                             good->nr);
6734                 if (!cache)
6735                         break;
6736                 tmp = container_of(cache, struct extent_record, cache);
6737
6738                 /*
6739                  * If we find another overlapping extent and it's found_rec is
6740                  * set then it's a duplicate and we need to try and delete
6741                  * something.
6742                  */
6743                 if (tmp->found_rec || tmp->num_duplicates > 0) {
6744                         if (list_empty(&good->list))
6745                                 list_add_tail(&good->list,
6746                                               &duplicate_extents);
6747                         good->num_duplicates += tmp->num_duplicates + 1;
6748                         list_splice_init(&tmp->dups, &good->dups);
6749                         list_del_init(&tmp->list);
6750                         list_add_tail(&tmp->list, &good->dups);
6751                         remove_cache_extent(extent_cache, &tmp->cache);
6752                         continue;
6753                 }
6754
6755                 /*
6756                  * Ok we have another non extent item backed extent rec, so lets
6757                  * just add it to this extent and carry on like we did above.
6758                  */
6759                 good->refs += tmp->refs;
6760                 list_splice_init(&tmp->backrefs, &good->backrefs);
6761                 remove_cache_extent(extent_cache, &tmp->cache);
6762                 free(tmp);
6763         }
6764         ret = insert_cache_extent(extent_cache, &good->cache);
6765         BUG_ON(ret);
6766         free(rec);
6767         return good->num_duplicates ? 0 : 1;
6768 }
6769
6770 static int delete_duplicate_records(struct btrfs_root *root,
6771                                     struct extent_record *rec)
6772 {
6773         struct btrfs_trans_handle *trans;
6774         LIST_HEAD(delete_list);
6775         struct btrfs_path *path;
6776         struct extent_record *tmp, *good, *n;
6777         int nr_del = 0;
6778         int ret = 0, err;
6779         struct btrfs_key key;
6780
6781         path = btrfs_alloc_path();
6782         if (!path) {
6783                 ret = -ENOMEM;
6784                 goto out;
6785         }
6786
6787         good = rec;
6788         /* Find the record that covers all of the duplicates. */
6789         list_for_each_entry(tmp, &rec->dups, list) {
6790                 if (good->start < tmp->start)
6791                         continue;
6792                 if (good->nr > tmp->nr)
6793                         continue;
6794
6795                 if (tmp->start + tmp->nr < good->start + good->nr) {
6796                         fprintf(stderr, "Ok we have overlapping extents that "
6797                                 "aren't completely covered by eachother, this "
6798                                 "is going to require more careful thought.  "
6799                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6800                                 tmp->start, tmp->nr, good->start, good->nr);
6801                         abort();
6802                 }
6803                 good = tmp;
6804         }
6805
6806         if (good != rec)
6807                 list_add_tail(&rec->list, &delete_list);
6808
6809         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6810                 if (tmp == good)
6811                         continue;
6812                 list_move_tail(&tmp->list, &delete_list);
6813         }
6814
6815         root = root->fs_info->extent_root;
6816         trans = btrfs_start_transaction(root, 1);
6817         if (IS_ERR(trans)) {
6818                 ret = PTR_ERR(trans);
6819                 goto out;
6820         }
6821
6822         list_for_each_entry(tmp, &delete_list, list) {
6823                 if (tmp->found_rec == 0)
6824                         continue;
6825                 key.objectid = tmp->start;
6826                 key.type = BTRFS_EXTENT_ITEM_KEY;
6827                 key.offset = tmp->nr;
6828
6829                 /* Shouldn't happen but just in case */
6830                 if (tmp->metadata) {
6831                         fprintf(stderr, "Well this shouldn't happen, extent "
6832                                 "record overlaps but is metadata? "
6833                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
6834                         abort();
6835                 }
6836
6837                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6838                 if (ret) {
6839                         if (ret > 0)
6840                                 ret = -EINVAL;
6841                         break;
6842                 }
6843                 ret = btrfs_del_item(trans, root, path);
6844                 if (ret)
6845                         break;
6846                 btrfs_release_path(path);
6847                 nr_del++;
6848         }
6849         err = btrfs_commit_transaction(trans, root);
6850         if (err && !ret)
6851                 ret = err;
6852 out:
6853         while (!list_empty(&delete_list)) {
6854                 tmp = list_entry(delete_list.next, struct extent_record, list);
6855                 list_del_init(&tmp->list);
6856                 if (tmp == rec)
6857                         continue;
6858                 free(tmp);
6859         }
6860
6861         while (!list_empty(&rec->dups)) {
6862                 tmp = list_entry(rec->dups.next, struct extent_record, list);
6863                 list_del_init(&tmp->list);
6864                 free(tmp);
6865         }
6866
6867         btrfs_free_path(path);
6868
6869         if (!ret && !nr_del)
6870                 rec->num_duplicates = 0;
6871
6872         return ret ? ret : nr_del;
6873 }
6874
6875 static int find_possible_backrefs(struct btrfs_fs_info *info,
6876                                   struct btrfs_path *path,
6877                                   struct cache_tree *extent_cache,
6878                                   struct extent_record *rec)
6879 {
6880         struct btrfs_root *root;
6881         struct extent_backref *back;
6882         struct data_backref *dback;
6883         struct cache_extent *cache;
6884         struct btrfs_file_extent_item *fi;
6885         struct btrfs_key key;
6886         u64 bytenr, bytes;
6887         int ret;
6888
6889         list_for_each_entry(back, &rec->backrefs, list) {
6890                 /* Don't care about full backrefs (poor unloved backrefs) */
6891                 if (back->full_backref || !back->is_data)
6892                         continue;
6893
6894                 dback = (struct data_backref *)back;
6895
6896                 /* We found this one, we don't need to do a lookup */
6897                 if (dback->found_ref)
6898                         continue;
6899
6900                 key.objectid = dback->root;
6901                 key.type = BTRFS_ROOT_ITEM_KEY;
6902                 key.offset = (u64)-1;
6903
6904                 root = btrfs_read_fs_root(info, &key);
6905
6906                 /* No root, definitely a bad ref, skip */
6907                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
6908                         continue;
6909                 /* Other err, exit */
6910                 if (IS_ERR(root))
6911                         return PTR_ERR(root);
6912
6913                 key.objectid = dback->owner;
6914                 key.type = BTRFS_EXTENT_DATA_KEY;
6915                 key.offset = dback->offset;
6916                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6917                 if (ret) {
6918                         btrfs_release_path(path);
6919                         if (ret < 0)
6920                                 return ret;
6921                         /* Didn't find it, we can carry on */
6922                         ret = 0;
6923                         continue;
6924                 }
6925
6926                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
6927                                     struct btrfs_file_extent_item);
6928                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
6929                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
6930                 btrfs_release_path(path);
6931                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6932                 if (cache) {
6933                         struct extent_record *tmp;
6934                         tmp = container_of(cache, struct extent_record, cache);
6935
6936                         /*
6937                          * If we found an extent record for the bytenr for this
6938                          * particular backref then we can't add it to our
6939                          * current extent record.  We only want to add backrefs
6940                          * that don't have a corresponding extent item in the
6941                          * extent tree since they likely belong to this record
6942                          * and we need to fix it if it doesn't match bytenrs.
6943                          */
6944                         if  (tmp->found_rec)
6945                                 continue;
6946                 }
6947
6948                 dback->found_ref += 1;
6949                 dback->disk_bytenr = bytenr;
6950                 dback->bytes = bytes;
6951
6952                 /*
6953                  * Set this so the verify backref code knows not to trust the
6954                  * values in this backref.
6955                  */
6956                 back->broken = 1;
6957         }
6958
6959         return 0;
6960 }
6961
6962 /*
6963  * Record orphan data ref into corresponding root.
6964  *
6965  * Return 0 if the extent item contains data ref and recorded.
6966  * Return 1 if the extent item contains no useful data ref
6967  *   On that case, it may contains only shared_dataref or metadata backref
6968  *   or the file extent exists(this should be handled by the extent bytenr
6969  *   recovery routine)
6970  * Return <0 if something goes wrong.
6971  */
6972 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
6973                                       struct extent_record *rec)
6974 {
6975         struct btrfs_key key;
6976         struct btrfs_root *dest_root;
6977         struct extent_backref *back;
6978         struct data_backref *dback;
6979         struct orphan_data_extent *orphan;
6980         struct btrfs_path *path;
6981         int recorded_data_ref = 0;
6982         int ret = 0;
6983
6984         if (rec->metadata)
6985                 return 1;
6986         path = btrfs_alloc_path();
6987         if (!path)
6988                 return -ENOMEM;
6989         list_for_each_entry(back, &rec->backrefs, list) {
6990                 if (back->full_backref || !back->is_data ||
6991                     !back->found_extent_tree)
6992                         continue;
6993                 dback = (struct data_backref *)back;
6994                 if (dback->found_ref)
6995                         continue;
6996                 key.objectid = dback->root;
6997                 key.type = BTRFS_ROOT_ITEM_KEY;
6998                 key.offset = (u64)-1;
6999
7000                 dest_root = btrfs_read_fs_root(fs_info, &key);
7001
7002                 /* For non-exist root we just skip it */
7003                 if (IS_ERR(dest_root) || !dest_root)
7004                         continue;
7005
7006                 key.objectid = dback->owner;
7007                 key.type = BTRFS_EXTENT_DATA_KEY;
7008                 key.offset = dback->offset;
7009
7010                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7011                 /*
7012                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7013                  * we need to record it for inode/file extent rebuild.
7014                  * For ret > 0, we record it only for file extent rebuild.
7015                  * For ret == 0, the file extent exists but only bytenr
7016                  * mismatch, let the original bytenr fix routine to handle,
7017                  * don't record it.
7018                  */
7019                 if (ret == 0)
7020                         continue;
7021                 ret = 0;
7022                 orphan = malloc(sizeof(*orphan));
7023                 if (!orphan) {
7024                         ret = -ENOMEM;
7025                         goto out;
7026                 }
7027                 INIT_LIST_HEAD(&orphan->list);
7028                 orphan->root = dback->root;
7029                 orphan->objectid = dback->owner;
7030                 orphan->offset = dback->offset;
7031                 orphan->disk_bytenr = rec->cache.start;
7032                 orphan->disk_len = rec->cache.size;
7033                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7034                 recorded_data_ref = 1;
7035         }
7036 out:
7037         btrfs_free_path(path);
7038         if (!ret)
7039                 return !recorded_data_ref;
7040         else
7041                 return ret;
7042 }
7043
7044 /*
7045  * when an incorrect extent item is found, this will delete
7046  * all of the existing entries for it and recreate them
7047  * based on what the tree scan found.
7048  */
7049 static int fixup_extent_refs(struct btrfs_fs_info *info,
7050                              struct cache_tree *extent_cache,
7051                              struct extent_record *rec)
7052 {
7053         struct btrfs_trans_handle *trans = NULL;
7054         int ret;
7055         struct btrfs_path *path;
7056         struct list_head *cur = rec->backrefs.next;
7057         struct cache_extent *cache;
7058         struct extent_backref *back;
7059         int allocated = 0;
7060         u64 flags = 0;
7061
7062         if (rec->flag_block_full_backref)
7063                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7064
7065         path = btrfs_alloc_path();
7066         if (!path)
7067                 return -ENOMEM;
7068
7069         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7070                 /*
7071                  * Sometimes the backrefs themselves are so broken they don't
7072                  * get attached to any meaningful rec, so first go back and
7073                  * check any of our backrefs that we couldn't find and throw
7074                  * them into the list if we find the backref so that
7075                  * verify_backrefs can figure out what to do.
7076                  */
7077                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7078                 if (ret < 0)
7079                         goto out;
7080         }
7081
7082         /* step one, make sure all of the backrefs agree */
7083         ret = verify_backrefs(info, path, rec);
7084         if (ret < 0)
7085                 goto out;
7086
7087         trans = btrfs_start_transaction(info->extent_root, 1);
7088         if (IS_ERR(trans)) {
7089                 ret = PTR_ERR(trans);
7090                 goto out;
7091         }
7092
7093         /* step two, delete all the existing records */
7094         ret = delete_extent_records(trans, info->extent_root, path,
7095                                     rec->start, rec->max_size);
7096
7097         if (ret < 0)
7098                 goto out;
7099
7100         /* was this block corrupt?  If so, don't add references to it */
7101         cache = lookup_cache_extent(info->corrupt_blocks,
7102                                     rec->start, rec->max_size);
7103         if (cache) {
7104                 ret = 0;
7105                 goto out;
7106         }
7107
7108         /* step three, recreate all the refs we did find */
7109         while(cur != &rec->backrefs) {
7110                 back = list_entry(cur, struct extent_backref, list);
7111                 cur = cur->next;
7112
7113                 /*
7114                  * if we didn't find any references, don't create a
7115                  * new extent record
7116                  */
7117                 if (!back->found_ref)
7118                         continue;
7119
7120                 rec->bad_full_backref = 0;
7121                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7122                 allocated = 1;
7123
7124                 if (ret)
7125                         goto out;
7126         }
7127 out:
7128         if (trans) {
7129                 int err = btrfs_commit_transaction(trans, info->extent_root);
7130                 if (!ret)
7131                         ret = err;
7132         }
7133
7134         btrfs_free_path(path);
7135         return ret;
7136 }
7137
7138 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7139                               struct extent_record *rec)
7140 {
7141         struct btrfs_trans_handle *trans;
7142         struct btrfs_root *root = fs_info->extent_root;
7143         struct btrfs_path *path;
7144         struct btrfs_extent_item *ei;
7145         struct btrfs_key key;
7146         u64 flags;
7147         int ret = 0;
7148
7149         key.objectid = rec->start;
7150         if (rec->metadata) {
7151                 key.type = BTRFS_METADATA_ITEM_KEY;
7152                 key.offset = rec->info_level;
7153         } else {
7154                 key.type = BTRFS_EXTENT_ITEM_KEY;
7155                 key.offset = rec->max_size;
7156         }
7157
7158         path = btrfs_alloc_path();
7159         if (!path)
7160                 return -ENOMEM;
7161
7162         trans = btrfs_start_transaction(root, 0);
7163         if (IS_ERR(trans)) {
7164                 btrfs_free_path(path);
7165                 return PTR_ERR(trans);
7166         }
7167
7168         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7169         if (ret < 0) {
7170                 btrfs_free_path(path);
7171                 btrfs_commit_transaction(trans, root);
7172                 return ret;
7173         } else if (ret) {
7174                 fprintf(stderr, "Didn't find extent for %llu\n",
7175                         (unsigned long long)rec->start);
7176                 btrfs_free_path(path);
7177                 btrfs_commit_transaction(trans, root);
7178                 return -ENOENT;
7179         }
7180
7181         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7182                             struct btrfs_extent_item);
7183         flags = btrfs_extent_flags(path->nodes[0], ei);
7184         if (rec->flag_block_full_backref) {
7185                 fprintf(stderr, "setting full backref on %llu\n",
7186                         (unsigned long long)key.objectid);
7187                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7188         } else {
7189                 fprintf(stderr, "clearing full backref on %llu\n",
7190                         (unsigned long long)key.objectid);
7191                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7192         }
7193         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7194         btrfs_mark_buffer_dirty(path->nodes[0]);
7195         btrfs_free_path(path);
7196         return btrfs_commit_transaction(trans, root);
7197 }
7198
7199 /* right now we only prune from the extent allocation tree */
7200 static int prune_one_block(struct btrfs_trans_handle *trans,
7201                            struct btrfs_fs_info *info,
7202                            struct btrfs_corrupt_block *corrupt)
7203 {
7204         int ret;
7205         struct btrfs_path path;
7206         struct extent_buffer *eb;
7207         u64 found;
7208         int slot;
7209         int nritems;
7210         int level = corrupt->level + 1;
7211
7212         btrfs_init_path(&path);
7213 again:
7214         /* we want to stop at the parent to our busted block */
7215         path.lowest_level = level;
7216
7217         ret = btrfs_search_slot(trans, info->extent_root,
7218                                 &corrupt->key, &path, -1, 1);
7219
7220         if (ret < 0)
7221                 goto out;
7222
7223         eb = path.nodes[level];
7224         if (!eb) {
7225                 ret = -ENOENT;
7226                 goto out;
7227         }
7228
7229         /*
7230          * hopefully the search gave us the block we want to prune,
7231          * lets try that first
7232          */
7233         slot = path.slots[level];
7234         found =  btrfs_node_blockptr(eb, slot);
7235         if (found == corrupt->cache.start)
7236                 goto del_ptr;
7237
7238         nritems = btrfs_header_nritems(eb);
7239
7240         /* the search failed, lets scan this node and hope we find it */
7241         for (slot = 0; slot < nritems; slot++) {
7242                 found =  btrfs_node_blockptr(eb, slot);
7243                 if (found == corrupt->cache.start)
7244                         goto del_ptr;
7245         }
7246         /*
7247          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7248          * to this block
7249          */
7250         if (eb == info->extent_root->node) {
7251                 ret = -ENOENT;
7252                 goto out;
7253         } else {
7254                 level++;
7255                 btrfs_release_path(&path);
7256                 goto again;
7257         }
7258
7259 del_ptr:
7260         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7261         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7262
7263 out:
7264         btrfs_release_path(&path);
7265         return ret;
7266 }
7267
7268 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7269 {
7270         struct btrfs_trans_handle *trans = NULL;
7271         struct cache_extent *cache;
7272         struct btrfs_corrupt_block *corrupt;
7273
7274         while (1) {
7275                 cache = search_cache_extent(info->corrupt_blocks, 0);
7276                 if (!cache)
7277                         break;
7278                 if (!trans) {
7279                         trans = btrfs_start_transaction(info->extent_root, 1);
7280                         if (IS_ERR(trans))
7281                                 return PTR_ERR(trans);
7282                 }
7283                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7284                 prune_one_block(trans, info, corrupt);
7285                 remove_cache_extent(info->corrupt_blocks, cache);
7286         }
7287         if (trans)
7288                 return btrfs_commit_transaction(trans, info->extent_root);
7289         return 0;
7290 }
7291
7292 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7293 {
7294         struct btrfs_block_group_cache *cache;
7295         u64 start, end;
7296         int ret;
7297
7298         while (1) {
7299                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7300                                             &start, &end, EXTENT_DIRTY);
7301                 if (ret)
7302                         break;
7303                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7304                                    GFP_NOFS);
7305         }
7306
7307         start = 0;
7308         while (1) {
7309                 cache = btrfs_lookup_first_block_group(fs_info, start);
7310                 if (!cache)
7311                         break;
7312                 if (cache->cached)
7313                         cache->cached = 0;
7314                 start = cache->key.objectid + cache->key.offset;
7315         }
7316 }
7317
7318 static int check_extent_refs(struct btrfs_root *root,
7319                              struct cache_tree *extent_cache)
7320 {
7321         struct extent_record *rec;
7322         struct cache_extent *cache;
7323         int err = 0;
7324         int ret = 0;
7325         int fixed = 0;
7326         int had_dups = 0;
7327         int recorded = 0;
7328
7329         if (repair) {
7330                 /*
7331                  * if we're doing a repair, we have to make sure
7332                  * we don't allocate from the problem extents.
7333                  * In the worst case, this will be all the
7334                  * extents in the FS
7335                  */
7336                 cache = search_cache_extent(extent_cache, 0);
7337                 while(cache) {
7338                         rec = container_of(cache, struct extent_record, cache);
7339                         set_extent_dirty(root->fs_info->excluded_extents,
7340                                          rec->start,
7341                                          rec->start + rec->max_size - 1,
7342                                          GFP_NOFS);
7343                         cache = next_cache_extent(cache);
7344                 }
7345
7346                 /* pin down all the corrupted blocks too */
7347                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7348                 while(cache) {
7349                         set_extent_dirty(root->fs_info->excluded_extents,
7350                                          cache->start,
7351                                          cache->start + cache->size - 1,
7352                                          GFP_NOFS);
7353                         cache = next_cache_extent(cache);
7354                 }
7355                 prune_corrupt_blocks(root->fs_info);
7356                 reset_cached_block_groups(root->fs_info);
7357         }
7358
7359         reset_cached_block_groups(root->fs_info);
7360
7361         /*
7362          * We need to delete any duplicate entries we find first otherwise we
7363          * could mess up the extent tree when we have backrefs that actually
7364          * belong to a different extent item and not the weird duplicate one.
7365          */
7366         while (repair && !list_empty(&duplicate_extents)) {
7367                 rec = list_entry(duplicate_extents.next, struct extent_record,
7368                                  list);
7369                 list_del_init(&rec->list);
7370
7371                 /* Sometimes we can find a backref before we find an actual
7372                  * extent, so we need to process it a little bit to see if there
7373                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7374                  * if this is a backref screwup.  If we need to delete stuff
7375                  * process_duplicates() will return 0, otherwise it will return
7376                  * 1 and we
7377                  */
7378                 if (process_duplicates(root, extent_cache, rec))
7379                         continue;
7380                 ret = delete_duplicate_records(root, rec);
7381                 if (ret < 0)
7382                         return ret;
7383                 /*
7384                  * delete_duplicate_records will return the number of entries
7385                  * deleted, so if it's greater than 0 then we know we actually
7386                  * did something and we need to remove.
7387                  */
7388                 if (ret)
7389                         had_dups = 1;
7390         }
7391
7392         if (had_dups)
7393                 return -EAGAIN;
7394
7395         while(1) {
7396                 int cur_err = 0;
7397
7398                 fixed = 0;
7399                 recorded = 0;
7400                 cache = search_cache_extent(extent_cache, 0);
7401                 if (!cache)
7402                         break;
7403                 rec = container_of(cache, struct extent_record, cache);
7404                 if (rec->num_duplicates) {
7405                         fprintf(stderr, "extent item %llu has multiple extent "
7406                                 "items\n", (unsigned long long)rec->start);
7407                         err = 1;
7408                         cur_err = 1;
7409                 }
7410
7411                 if (rec->refs != rec->extent_item_refs) {
7412                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7413                                 (unsigned long long)rec->start,
7414                                 (unsigned long long)rec->nr);
7415                         fprintf(stderr, "extent item %llu, found %llu\n",
7416                                 (unsigned long long)rec->extent_item_refs,
7417                                 (unsigned long long)rec->refs);
7418                         ret = record_orphan_data_extents(root->fs_info, rec);
7419                         if (ret < 0)
7420                                 goto repair_abort;
7421                         if (ret == 0) {
7422                                 recorded = 1;
7423                         } else {
7424                                 /*
7425                                  * we can't use the extent to repair file
7426                                  * extent, let the fallback method handle it.
7427                                  */
7428                                 if (!fixed && repair) {
7429                                         ret = fixup_extent_refs(
7430                                                         root->fs_info,
7431                                                         extent_cache, rec);
7432                                         if (ret)
7433                                                 goto repair_abort;
7434                                         fixed = 1;
7435                                 }
7436                         }
7437                         err = 1;
7438                         cur_err = 1;
7439                 }
7440                 if (all_backpointers_checked(rec, 1)) {
7441                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7442                                 (unsigned long long)rec->start,
7443                                 (unsigned long long)rec->nr);
7444
7445                         if (!fixed && !recorded && repair) {
7446                                 ret = fixup_extent_refs(root->fs_info,
7447                                                         extent_cache, rec);
7448                                 if (ret)
7449                                         goto repair_abort;
7450                                 fixed = 1;
7451                         }
7452                         cur_err = 1;
7453                         err = 1;
7454                 }
7455                 if (!rec->owner_ref_checked) {
7456                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7457                                 (unsigned long long)rec->start,
7458                                 (unsigned long long)rec->nr);
7459                         if (!fixed && !recorded && repair) {
7460                                 ret = fixup_extent_refs(root->fs_info,
7461                                                         extent_cache, rec);
7462                                 if (ret)
7463                                         goto repair_abort;
7464                                 fixed = 1;
7465                         }
7466                         err = 1;
7467                         cur_err = 1;
7468                 }
7469                 if (rec->bad_full_backref) {
7470                         fprintf(stderr, "bad full backref, on [%llu]\n",
7471                                 (unsigned long long)rec->start);
7472                         if (repair) {
7473                                 ret = fixup_extent_flags(root->fs_info, rec);
7474                                 if (ret)
7475                                         goto repair_abort;
7476                                 fixed = 1;
7477                         }
7478                         err = 1;
7479                         cur_err = 1;
7480                 }
7481
7482                 remove_cache_extent(extent_cache, cache);
7483                 free_all_extent_backrefs(rec);
7484                 if (!init_extent_tree && repair && (!cur_err || fixed))
7485                         clear_extent_dirty(root->fs_info->excluded_extents,
7486                                            rec->start,
7487                                            rec->start + rec->max_size - 1,
7488                                            GFP_NOFS);
7489                 free(rec);
7490         }
7491 repair_abort:
7492         if (repair) {
7493                 if (ret && ret != -EAGAIN) {
7494                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7495                         exit(1);
7496                 } else if (!ret) {
7497                         struct btrfs_trans_handle *trans;
7498
7499                         root = root->fs_info->extent_root;
7500                         trans = btrfs_start_transaction(root, 1);
7501                         if (IS_ERR(trans)) {
7502                                 ret = PTR_ERR(trans);
7503                                 goto repair_abort;
7504                         }
7505
7506                         btrfs_fix_block_accounting(trans, root);
7507                         ret = btrfs_commit_transaction(trans, root);
7508                         if (ret)
7509                                 goto repair_abort;
7510                 }
7511                 if (err)
7512                         fprintf(stderr, "repaired damaged extent references\n");
7513                 return ret;
7514         }
7515         return err;
7516 }
7517
7518 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7519 {
7520         u64 stripe_size;
7521
7522         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7523                 stripe_size = length;
7524                 stripe_size /= num_stripes;
7525         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7526                 stripe_size = length * 2;
7527                 stripe_size /= num_stripes;
7528         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7529                 stripe_size = length;
7530                 stripe_size /= (num_stripes - 1);
7531         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7532                 stripe_size = length;
7533                 stripe_size /= (num_stripes - 2);
7534         } else {
7535                 stripe_size = length;
7536         }
7537         return stripe_size;
7538 }
7539
7540 /*
7541  * Check the chunk with its block group/dev list ref:
7542  * Return 0 if all refs seems valid.
7543  * Return 1 if part of refs seems valid, need later check for rebuild ref
7544  * like missing block group and needs to search extent tree to rebuild them.
7545  * Return -1 if essential refs are missing and unable to rebuild.
7546  */
7547 static int check_chunk_refs(struct chunk_record *chunk_rec,
7548                             struct block_group_tree *block_group_cache,
7549                             struct device_extent_tree *dev_extent_cache,
7550                             int silent)
7551 {
7552         struct cache_extent *block_group_item;
7553         struct block_group_record *block_group_rec;
7554         struct cache_extent *dev_extent_item;
7555         struct device_extent_record *dev_extent_rec;
7556         u64 devid;
7557         u64 offset;
7558         u64 length;
7559         int metadump_v2 = 0;
7560         int i;
7561         int ret = 0;
7562
7563         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7564                                                chunk_rec->offset,
7565                                                chunk_rec->length);
7566         if (block_group_item) {
7567                 block_group_rec = container_of(block_group_item,
7568                                                struct block_group_record,
7569                                                cache);
7570                 if (chunk_rec->length != block_group_rec->offset ||
7571                     chunk_rec->offset != block_group_rec->objectid ||
7572                     (!metadump_v2 &&
7573                      chunk_rec->type_flags != block_group_rec->flags)) {
7574                         if (!silent)
7575                                 fprintf(stderr,
7576                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7577                                         chunk_rec->objectid,
7578                                         chunk_rec->type,
7579                                         chunk_rec->offset,
7580                                         chunk_rec->length,
7581                                         chunk_rec->offset,
7582                                         chunk_rec->type_flags,
7583                                         block_group_rec->objectid,
7584                                         block_group_rec->type,
7585                                         block_group_rec->offset,
7586                                         block_group_rec->offset,
7587                                         block_group_rec->objectid,
7588                                         block_group_rec->flags);
7589                         ret = -1;
7590                 } else {
7591                         list_del_init(&block_group_rec->list);
7592                         chunk_rec->bg_rec = block_group_rec;
7593                 }
7594         } else {
7595                 if (!silent)
7596                         fprintf(stderr,
7597                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7598                                 chunk_rec->objectid,
7599                                 chunk_rec->type,
7600                                 chunk_rec->offset,
7601                                 chunk_rec->length,
7602                                 chunk_rec->offset,
7603                                 chunk_rec->type_flags);
7604                 ret = 1;
7605         }
7606
7607         if (metadump_v2)
7608                 return ret;
7609
7610         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7611                                     chunk_rec->num_stripes);
7612         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7613                 devid = chunk_rec->stripes[i].devid;
7614                 offset = chunk_rec->stripes[i].offset;
7615                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7616                                                        devid, offset, length);
7617                 if (dev_extent_item) {
7618                         dev_extent_rec = container_of(dev_extent_item,
7619                                                 struct device_extent_record,
7620                                                 cache);
7621                         if (dev_extent_rec->objectid != devid ||
7622                             dev_extent_rec->offset != offset ||
7623                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7624                             dev_extent_rec->length != length) {
7625                                 if (!silent)
7626                                         fprintf(stderr,
7627                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7628                                                 chunk_rec->objectid,
7629                                                 chunk_rec->type,
7630                                                 chunk_rec->offset,
7631                                                 chunk_rec->stripes[i].devid,
7632                                                 chunk_rec->stripes[i].offset,
7633                                                 dev_extent_rec->objectid,
7634                                                 dev_extent_rec->offset,
7635                                                 dev_extent_rec->length);
7636                                 ret = -1;
7637                         } else {
7638                                 list_move(&dev_extent_rec->chunk_list,
7639                                           &chunk_rec->dextents);
7640                         }
7641                 } else {
7642                         if (!silent)
7643                                 fprintf(stderr,
7644                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7645                                         chunk_rec->objectid,
7646                                         chunk_rec->type,
7647                                         chunk_rec->offset,
7648                                         chunk_rec->stripes[i].devid,
7649                                         chunk_rec->stripes[i].offset);
7650                         ret = -1;
7651                 }
7652         }
7653         return ret;
7654 }
7655
7656 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7657 int check_chunks(struct cache_tree *chunk_cache,
7658                  struct block_group_tree *block_group_cache,
7659                  struct device_extent_tree *dev_extent_cache,
7660                  struct list_head *good, struct list_head *bad,
7661                  struct list_head *rebuild, int silent)
7662 {
7663         struct cache_extent *chunk_item;
7664         struct chunk_record *chunk_rec;
7665         struct block_group_record *bg_rec;
7666         struct device_extent_record *dext_rec;
7667         int err;
7668         int ret = 0;
7669
7670         chunk_item = first_cache_extent(chunk_cache);
7671         while (chunk_item) {
7672                 chunk_rec = container_of(chunk_item, struct chunk_record,
7673                                          cache);
7674                 err = check_chunk_refs(chunk_rec, block_group_cache,
7675                                        dev_extent_cache, silent);
7676                 if (err < 0)
7677                         ret = err;
7678                 if (err == 0 && good)
7679                         list_add_tail(&chunk_rec->list, good);
7680                 if (err > 0 && rebuild)
7681                         list_add_tail(&chunk_rec->list, rebuild);
7682                 if (err < 0 && bad)
7683                         list_add_tail(&chunk_rec->list, bad);
7684                 chunk_item = next_cache_extent(chunk_item);
7685         }
7686
7687         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7688                 if (!silent)
7689                         fprintf(stderr,
7690                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7691                                 bg_rec->objectid,
7692                                 bg_rec->offset,
7693                                 bg_rec->flags);
7694                 if (!ret)
7695                         ret = 1;
7696         }
7697
7698         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7699                             chunk_list) {
7700                 if (!silent)
7701                         fprintf(stderr,
7702                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7703                                 dext_rec->objectid,
7704                                 dext_rec->offset,
7705                                 dext_rec->length);
7706                 if (!ret)
7707                         ret = 1;
7708         }
7709         return ret;
7710 }
7711
7712
7713 static int check_device_used(struct device_record *dev_rec,
7714                              struct device_extent_tree *dext_cache)
7715 {
7716         struct cache_extent *cache;
7717         struct device_extent_record *dev_extent_rec;
7718         u64 total_byte = 0;
7719
7720         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7721         while (cache) {
7722                 dev_extent_rec = container_of(cache,
7723                                               struct device_extent_record,
7724                                               cache);
7725                 if (dev_extent_rec->objectid != dev_rec->devid)
7726                         break;
7727
7728                 list_del_init(&dev_extent_rec->device_list);
7729                 total_byte += dev_extent_rec->length;
7730                 cache = next_cache_extent(cache);
7731         }
7732
7733         if (total_byte != dev_rec->byte_used) {
7734                 fprintf(stderr,
7735                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7736                         total_byte, dev_rec->byte_used, dev_rec->objectid,
7737                         dev_rec->type, dev_rec->offset);
7738                 return -1;
7739         } else {
7740                 return 0;
7741         }
7742 }
7743
7744 /* check btrfs_dev_item -> btrfs_dev_extent */
7745 static int check_devices(struct rb_root *dev_cache,
7746                          struct device_extent_tree *dev_extent_cache)
7747 {
7748         struct rb_node *dev_node;
7749         struct device_record *dev_rec;
7750         struct device_extent_record *dext_rec;
7751         int err;
7752         int ret = 0;
7753
7754         dev_node = rb_first(dev_cache);
7755         while (dev_node) {
7756                 dev_rec = container_of(dev_node, struct device_record, node);
7757                 err = check_device_used(dev_rec, dev_extent_cache);
7758                 if (err)
7759                         ret = err;
7760
7761                 dev_node = rb_next(dev_node);
7762         }
7763         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7764                             device_list) {
7765                 fprintf(stderr,
7766                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7767                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
7768                 if (!ret)
7769                         ret = 1;
7770         }
7771         return ret;
7772 }
7773
7774 static int add_root_item_to_list(struct list_head *head,
7775                                   u64 objectid, u64 bytenr, u64 last_snapshot,
7776                                   u8 level, u8 drop_level,
7777                                   int level_size, struct btrfs_key *drop_key)
7778 {
7779
7780         struct root_item_record *ri_rec;
7781         ri_rec = malloc(sizeof(*ri_rec));
7782         if (!ri_rec)
7783                 return -ENOMEM;
7784         ri_rec->bytenr = bytenr;
7785         ri_rec->objectid = objectid;
7786         ri_rec->level = level;
7787         ri_rec->level_size = level_size;
7788         ri_rec->drop_level = drop_level;
7789         ri_rec->last_snapshot = last_snapshot;
7790         if (drop_key)
7791                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7792         list_add_tail(&ri_rec->list, head);
7793
7794         return 0;
7795 }
7796
7797 static void free_root_item_list(struct list_head *list)
7798 {
7799         struct root_item_record *ri_rec;
7800
7801         while (!list_empty(list)) {
7802                 ri_rec = list_first_entry(list, struct root_item_record,
7803                                           list);
7804                 list_del_init(&ri_rec->list);
7805                 free(ri_rec);
7806         }
7807 }
7808
7809 static int deal_root_from_list(struct list_head *list,
7810                                struct btrfs_root *root,
7811                                struct block_info *bits,
7812                                int bits_nr,
7813                                struct cache_tree *pending,
7814                                struct cache_tree *seen,
7815                                struct cache_tree *reada,
7816                                struct cache_tree *nodes,
7817                                struct cache_tree *extent_cache,
7818                                struct cache_tree *chunk_cache,
7819                                struct rb_root *dev_cache,
7820                                struct block_group_tree *block_group_cache,
7821                                struct device_extent_tree *dev_extent_cache)
7822 {
7823         int ret = 0;
7824         u64 last;
7825
7826         while (!list_empty(list)) {
7827                 struct root_item_record *rec;
7828                 struct extent_buffer *buf;
7829                 rec = list_entry(list->next,
7830                                  struct root_item_record, list);
7831                 last = 0;
7832                 buf = read_tree_block(root->fs_info->tree_root,
7833                                       rec->bytenr, rec->level_size, 0);
7834                 if (!extent_buffer_uptodate(buf)) {
7835                         free_extent_buffer(buf);
7836                         ret = -EIO;
7837                         break;
7838                 }
7839                 add_root_to_pending(buf, extent_cache, pending,
7840                                     seen, nodes, rec->objectid);
7841                 /*
7842                  * To rebuild extent tree, we need deal with snapshot
7843                  * one by one, otherwise we deal with node firstly which
7844                  * can maximize readahead.
7845                  */
7846                 while (1) {
7847                         ret = run_next_block(root, bits, bits_nr, &last,
7848                                              pending, seen, reada, nodes,
7849                                              extent_cache, chunk_cache,
7850                                              dev_cache, block_group_cache,
7851                                              dev_extent_cache, rec);
7852                         if (ret != 0)
7853                                 break;
7854                 }
7855                 free_extent_buffer(buf);
7856                 list_del(&rec->list);
7857                 free(rec);
7858                 if (ret < 0)
7859                         break;
7860         }
7861         while (ret >= 0) {
7862                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
7863                                      reada, nodes, extent_cache, chunk_cache,
7864                                      dev_cache, block_group_cache,
7865                                      dev_extent_cache, NULL);
7866                 if (ret != 0) {
7867                         if (ret > 0)
7868                                 ret = 0;
7869                         break;
7870                 }
7871         }
7872         return ret;
7873 }
7874
7875 static int check_chunks_and_extents(struct btrfs_root *root)
7876 {
7877         struct rb_root dev_cache;
7878         struct cache_tree chunk_cache;
7879         struct block_group_tree block_group_cache;
7880         struct device_extent_tree dev_extent_cache;
7881         struct cache_tree extent_cache;
7882         struct cache_tree seen;
7883         struct cache_tree pending;
7884         struct cache_tree reada;
7885         struct cache_tree nodes;
7886         struct extent_io_tree excluded_extents;
7887         struct cache_tree corrupt_blocks;
7888         struct btrfs_path path;
7889         struct btrfs_key key;
7890         struct btrfs_key found_key;
7891         int ret, err = 0;
7892         struct block_info *bits;
7893         int bits_nr;
7894         struct extent_buffer *leaf;
7895         int slot;
7896         struct btrfs_root_item ri;
7897         struct list_head dropping_trees;
7898         struct list_head normal_trees;
7899         struct btrfs_root *root1;
7900         u64 objectid;
7901         u32 level_size;
7902         u8 level;
7903
7904         dev_cache = RB_ROOT;
7905         cache_tree_init(&chunk_cache);
7906         block_group_tree_init(&block_group_cache);
7907         device_extent_tree_init(&dev_extent_cache);
7908
7909         cache_tree_init(&extent_cache);
7910         cache_tree_init(&seen);
7911         cache_tree_init(&pending);
7912         cache_tree_init(&nodes);
7913         cache_tree_init(&reada);
7914         cache_tree_init(&corrupt_blocks);
7915         extent_io_tree_init(&excluded_extents);
7916         INIT_LIST_HEAD(&dropping_trees);
7917         INIT_LIST_HEAD(&normal_trees);
7918
7919         if (repair) {
7920                 root->fs_info->excluded_extents = &excluded_extents;
7921                 root->fs_info->fsck_extent_cache = &extent_cache;
7922                 root->fs_info->free_extent_hook = free_extent_hook;
7923                 root->fs_info->corrupt_blocks = &corrupt_blocks;
7924         }
7925
7926         bits_nr = 1024;
7927         bits = malloc(bits_nr * sizeof(struct block_info));
7928         if (!bits) {
7929                 perror("malloc");
7930                 exit(1);
7931         }
7932
7933 again:
7934         root1 = root->fs_info->tree_root;
7935         level = btrfs_header_level(root1->node);
7936         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
7937                                     root1->node->start, 0, level, 0,
7938                                     btrfs_level_size(root1, level), NULL);
7939         if (ret < 0)
7940                 goto out;
7941         root1 = root->fs_info->chunk_root;
7942         level = btrfs_header_level(root1->node);
7943         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
7944                                     root1->node->start, 0, level, 0,
7945                                     btrfs_level_size(root1, level), NULL);
7946         if (ret < 0)
7947                 goto out;
7948         btrfs_init_path(&path);
7949         key.offset = 0;
7950         key.objectid = 0;
7951         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
7952         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
7953                                         &key, &path, 0, 0);
7954         if (ret < 0)
7955                 goto out;
7956         while(1) {
7957                 leaf = path.nodes[0];
7958                 slot = path.slots[0];
7959                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
7960                         ret = btrfs_next_leaf(root, &path);
7961                         if (ret != 0)
7962                                 break;
7963                         leaf = path.nodes[0];
7964                         slot = path.slots[0];
7965                 }
7966                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
7967                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
7968                         unsigned long offset;
7969                         u64 last_snapshot;
7970
7971                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7972                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
7973                         last_snapshot = btrfs_root_last_snapshot(&ri);
7974                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
7975                                 level = btrfs_root_level(&ri);
7976                                 level_size = btrfs_level_size(root, level);
7977                                 ret = add_root_item_to_list(&normal_trees,
7978                                                 found_key.objectid,
7979                                                 btrfs_root_bytenr(&ri),
7980                                                 last_snapshot, level,
7981                                                 0, level_size, NULL);
7982                                 if (ret < 0)
7983                                         goto out;
7984                         } else {
7985                                 level = btrfs_root_level(&ri);
7986                                 level_size = btrfs_level_size(root, level);
7987                                 objectid = found_key.objectid;
7988                                 btrfs_disk_key_to_cpu(&found_key,
7989                                                       &ri.drop_progress);
7990                                 ret = add_root_item_to_list(&dropping_trees,
7991                                                 objectid,
7992                                                 btrfs_root_bytenr(&ri),
7993                                                 last_snapshot, level,
7994                                                 ri.drop_level,
7995                                                 level_size, &found_key);
7996                                 if (ret < 0)
7997                                         goto out;
7998                         }
7999                 }
8000                 path.slots[0]++;
8001         }
8002         btrfs_release_path(&path);
8003
8004         /*
8005          * check_block can return -EAGAIN if it fixes something, please keep
8006          * this in mind when dealing with return values from these functions, if
8007          * we get -EAGAIN we want to fall through and restart the loop.
8008          */
8009         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8010                                   &seen, &reada, &nodes, &extent_cache,
8011                                   &chunk_cache, &dev_cache, &block_group_cache,
8012                                   &dev_extent_cache);
8013         if (ret < 0) {
8014                 if (ret == -EAGAIN)
8015                         goto loop;
8016                 goto out;
8017         }
8018         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8019                                   &pending, &seen, &reada, &nodes,
8020                                   &extent_cache, &chunk_cache, &dev_cache,
8021                                   &block_group_cache, &dev_extent_cache);
8022         if (ret < 0) {
8023                 if (ret == -EAGAIN)
8024                         goto loop;
8025                 goto out;
8026         }
8027
8028         err = check_chunks(&chunk_cache, &block_group_cache,
8029                            &dev_extent_cache, NULL, NULL, NULL, 0);
8030         if (err) {
8031                 if (err == -EAGAIN)
8032                         goto loop;
8033                 if (!ret)
8034                         ret = err;
8035         }
8036
8037         ret = check_extent_refs(root, &extent_cache);
8038         if (ret < 0) {
8039                 if (ret == -EAGAIN)
8040                         goto loop;
8041                 goto out;
8042         }
8043
8044         err = check_devices(&dev_cache, &dev_extent_cache);
8045         if (err && !ret)
8046                 ret = err;
8047
8048 out:
8049         if (repair) {
8050                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8051                 extent_io_tree_cleanup(&excluded_extents);
8052                 root->fs_info->fsck_extent_cache = NULL;
8053                 root->fs_info->free_extent_hook = NULL;
8054                 root->fs_info->corrupt_blocks = NULL;
8055                 root->fs_info->excluded_extents = NULL;
8056         }
8057         free(bits);
8058         free_chunk_cache_tree(&chunk_cache);
8059         free_device_cache_tree(&dev_cache);
8060         free_block_group_tree(&block_group_cache);
8061         free_device_extent_tree(&dev_extent_cache);
8062         free_extent_cache_tree(&seen);
8063         free_extent_cache_tree(&pending);
8064         free_extent_cache_tree(&reada);
8065         free_extent_cache_tree(&nodes);
8066         return ret;
8067 loop:
8068         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8069         free_extent_cache_tree(&seen);
8070         free_extent_cache_tree(&pending);
8071         free_extent_cache_tree(&reada);
8072         free_extent_cache_tree(&nodes);
8073         free_chunk_cache_tree(&chunk_cache);
8074         free_block_group_tree(&block_group_cache);
8075         free_device_cache_tree(&dev_cache);
8076         free_device_extent_tree(&dev_extent_cache);
8077         free_extent_record_cache(root->fs_info, &extent_cache);
8078         free_root_item_list(&normal_trees);
8079         free_root_item_list(&dropping_trees);
8080         extent_io_tree_cleanup(&excluded_extents);
8081         goto again;
8082 }
8083
8084 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8085                            struct btrfs_root *root, int overwrite)
8086 {
8087         struct extent_buffer *c;
8088         struct extent_buffer *old = root->node;
8089         int level;
8090         int ret;
8091         struct btrfs_disk_key disk_key = {0,0,0};
8092
8093         level = 0;
8094
8095         if (overwrite) {
8096                 c = old;
8097                 extent_buffer_get(c);
8098                 goto init;
8099         }
8100         c = btrfs_alloc_free_block(trans, root,
8101                                    btrfs_level_size(root, 0),
8102                                    root->root_key.objectid,
8103                                    &disk_key, level, 0, 0);
8104         if (IS_ERR(c)) {
8105                 c = old;
8106                 extent_buffer_get(c);
8107                 overwrite = 1;
8108         }
8109 init:
8110         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8111         btrfs_set_header_level(c, level);
8112         btrfs_set_header_bytenr(c, c->start);
8113         btrfs_set_header_generation(c, trans->transid);
8114         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8115         btrfs_set_header_owner(c, root->root_key.objectid);
8116
8117         write_extent_buffer(c, root->fs_info->fsid,
8118                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8119
8120         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8121                             btrfs_header_chunk_tree_uuid(c),
8122                             BTRFS_UUID_SIZE);
8123
8124         btrfs_mark_buffer_dirty(c);
8125         /*
8126          * this case can happen in the following case:
8127          *
8128          * 1.overwrite previous root.
8129          *
8130          * 2.reinit reloc data root, this is because we skip pin
8131          * down reloc data tree before which means we can allocate
8132          * same block bytenr here.
8133          */
8134         if (old->start == c->start) {
8135                 btrfs_set_root_generation(&root->root_item,
8136                                           trans->transid);
8137                 root->root_item.level = btrfs_header_level(root->node);
8138                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8139                                         &root->root_key, &root->root_item);
8140                 if (ret) {
8141                         free_extent_buffer(c);
8142                         return ret;
8143                 }
8144         }
8145         free_extent_buffer(old);
8146         root->node = c;
8147         add_root_to_dirty_list(root);
8148         return 0;
8149 }
8150
8151 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8152                                 struct extent_buffer *eb, int tree_root)
8153 {
8154         struct extent_buffer *tmp;
8155         struct btrfs_root_item *ri;
8156         struct btrfs_key key;
8157         u64 bytenr;
8158         u32 leafsize;
8159         int level = btrfs_header_level(eb);
8160         int nritems;
8161         int ret;
8162         int i;
8163
8164         /*
8165          * If we have pinned this block before, don't pin it again.
8166          * This can not only avoid forever loop with broken filesystem
8167          * but also give us some speedups.
8168          */
8169         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8170                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8171                 return 0;
8172
8173         btrfs_pin_extent(fs_info, eb->start, eb->len);
8174
8175         leafsize = btrfs_super_leafsize(fs_info->super_copy);
8176         nritems = btrfs_header_nritems(eb);
8177         for (i = 0; i < nritems; i++) {
8178                 if (level == 0) {
8179                         btrfs_item_key_to_cpu(eb, &key, i);
8180                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8181                                 continue;
8182                         /* Skip the extent root and reloc roots */
8183                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8184                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8185                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8186                                 continue;
8187                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8188                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8189
8190                         /*
8191                          * If at any point we start needing the real root we
8192                          * will have to build a stump root for the root we are
8193                          * in, but for now this doesn't actually use the root so
8194                          * just pass in extent_root.
8195                          */
8196                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8197                                               leafsize, 0);
8198                         if (!extent_buffer_uptodate(tmp)) {
8199                                 fprintf(stderr, "Error reading root block\n");
8200                                 return -EIO;
8201                         }
8202                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8203                         free_extent_buffer(tmp);
8204                         if (ret)
8205                                 return ret;
8206                 } else {
8207                         bytenr = btrfs_node_blockptr(eb, i);
8208
8209                         /* If we aren't the tree root don't read the block */
8210                         if (level == 1 && !tree_root) {
8211                                 btrfs_pin_extent(fs_info, bytenr, leafsize);
8212                                 continue;
8213                         }
8214
8215                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8216                                               leafsize, 0);
8217                         if (!extent_buffer_uptodate(tmp)) {
8218                                 fprintf(stderr, "Error reading tree block\n");
8219                                 return -EIO;
8220                         }
8221                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8222                         free_extent_buffer(tmp);
8223                         if (ret)
8224                                 return ret;
8225                 }
8226         }
8227
8228         return 0;
8229 }
8230
8231 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8232 {
8233         int ret;
8234
8235         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8236         if (ret)
8237                 return ret;
8238
8239         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8240 }
8241
8242 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8243 {
8244         struct btrfs_block_group_cache *cache;
8245         struct btrfs_path *path;
8246         struct extent_buffer *leaf;
8247         struct btrfs_chunk *chunk;
8248         struct btrfs_key key;
8249         int ret;
8250         u64 start;
8251
8252         path = btrfs_alloc_path();
8253         if (!path)
8254                 return -ENOMEM;
8255
8256         key.objectid = 0;
8257         key.type = BTRFS_CHUNK_ITEM_KEY;
8258         key.offset = 0;
8259
8260         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8261         if (ret < 0) {
8262                 btrfs_free_path(path);
8263                 return ret;
8264         }
8265
8266         /*
8267          * We do this in case the block groups were screwed up and had alloc
8268          * bits that aren't actually set on the chunks.  This happens with
8269          * restored images every time and could happen in real life I guess.
8270          */
8271         fs_info->avail_data_alloc_bits = 0;
8272         fs_info->avail_metadata_alloc_bits = 0;
8273         fs_info->avail_system_alloc_bits = 0;
8274
8275         /* First we need to create the in-memory block groups */
8276         while (1) {
8277                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8278                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8279                         if (ret < 0) {
8280                                 btrfs_free_path(path);
8281                                 return ret;
8282                         }
8283                         if (ret) {
8284                                 ret = 0;
8285                                 break;
8286                         }
8287                 }
8288                 leaf = path->nodes[0];
8289                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8290                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8291                         path->slots[0]++;
8292                         continue;
8293                 }
8294
8295                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8296                                        struct btrfs_chunk);
8297                 btrfs_add_block_group(fs_info, 0,
8298                                       btrfs_chunk_type(leaf, chunk),
8299                                       key.objectid, key.offset,
8300                                       btrfs_chunk_length(leaf, chunk));
8301                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8302                                  key.offset + btrfs_chunk_length(leaf, chunk),
8303                                  GFP_NOFS);
8304                 path->slots[0]++;
8305         }
8306         start = 0;
8307         while (1) {
8308                 cache = btrfs_lookup_first_block_group(fs_info, start);
8309                 if (!cache)
8310                         break;
8311                 cache->cached = 1;
8312                 start = cache->key.objectid + cache->key.offset;
8313         }
8314
8315         btrfs_free_path(path);
8316         return 0;
8317 }
8318
8319 static int reset_balance(struct btrfs_trans_handle *trans,
8320                          struct btrfs_fs_info *fs_info)
8321 {
8322         struct btrfs_root *root = fs_info->tree_root;
8323         struct btrfs_path *path;
8324         struct extent_buffer *leaf;
8325         struct btrfs_key key;
8326         int del_slot, del_nr = 0;
8327         int ret;
8328         int found = 0;
8329
8330         path = btrfs_alloc_path();
8331         if (!path)
8332                 return -ENOMEM;
8333
8334         key.objectid = BTRFS_BALANCE_OBJECTID;
8335         key.type = BTRFS_BALANCE_ITEM_KEY;
8336         key.offset = 0;
8337
8338         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8339         if (ret) {
8340                 if (ret > 0)
8341                         ret = 0;
8342                 if (!ret)
8343                         goto reinit_data_reloc;
8344                 else
8345                         goto out;
8346         }
8347
8348         ret = btrfs_del_item(trans, root, path);
8349         if (ret)
8350                 goto out;
8351         btrfs_release_path(path);
8352
8353         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8354         key.type = BTRFS_ROOT_ITEM_KEY;
8355         key.offset = 0;
8356
8357         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8358         if (ret < 0)
8359                 goto out;
8360         while (1) {
8361                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8362                         if (!found)
8363                                 break;
8364
8365                         if (del_nr) {
8366                                 ret = btrfs_del_items(trans, root, path,
8367                                                       del_slot, del_nr);
8368                                 del_nr = 0;
8369                                 if (ret)
8370                                         goto out;
8371                         }
8372                         key.offset++;
8373                         btrfs_release_path(path);
8374
8375                         found = 0;
8376                         ret = btrfs_search_slot(trans, root, &key, path,
8377                                                 -1, 1);
8378                         if (ret < 0)
8379                                 goto out;
8380                         continue;
8381                 }
8382                 found = 1;
8383                 leaf = path->nodes[0];
8384                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8385                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8386                         break;
8387                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8388                         path->slots[0]++;
8389                         continue;
8390                 }
8391                 if (!del_nr) {
8392                         del_slot = path->slots[0];
8393                         del_nr = 1;
8394                 } else {
8395                         del_nr++;
8396                 }
8397                 path->slots[0]++;
8398         }
8399
8400         if (del_nr) {
8401                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8402                 if (ret)
8403                         goto out;
8404         }
8405         btrfs_release_path(path);
8406
8407 reinit_data_reloc:
8408         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8409         key.type = BTRFS_ROOT_ITEM_KEY;
8410         key.offset = (u64)-1;
8411         root = btrfs_read_fs_root(fs_info, &key);
8412         if (IS_ERR(root)) {
8413                 fprintf(stderr, "Error reading data reloc tree\n");
8414                 ret = PTR_ERR(root);
8415                 goto out;
8416         }
8417         record_root_in_trans(trans, root);
8418         ret = btrfs_fsck_reinit_root(trans, root, 0);
8419         if (ret)
8420                 goto out;
8421         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8422 out:
8423         btrfs_free_path(path);
8424         return ret;
8425 }
8426
8427 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8428                               struct btrfs_fs_info *fs_info)
8429 {
8430         u64 start = 0;
8431         int ret;
8432
8433         /*
8434          * The only reason we don't do this is because right now we're just
8435          * walking the trees we find and pinning down their bytes, we don't look
8436          * at any of the leaves.  In order to do mixed groups we'd have to check
8437          * the leaves of any fs roots and pin down the bytes for any file
8438          * extents we find.  Not hard but why do it if we don't have to?
8439          */
8440         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8441                 fprintf(stderr, "We don't support re-initing the extent tree "
8442                         "for mixed block groups yet, please notify a btrfs "
8443                         "developer you want to do this so they can add this "
8444                         "functionality.\n");
8445                 return -EINVAL;
8446         }
8447
8448         /*
8449          * first we need to walk all of the trees except the extent tree and pin
8450          * down the bytes that are in use so we don't overwrite any existing
8451          * metadata.
8452          */
8453         ret = pin_metadata_blocks(fs_info);
8454         if (ret) {
8455                 fprintf(stderr, "error pinning down used bytes\n");
8456                 return ret;
8457         }
8458
8459         /*
8460          * Need to drop all the block groups since we're going to recreate all
8461          * of them again.
8462          */
8463         btrfs_free_block_groups(fs_info);
8464         ret = reset_block_groups(fs_info);
8465         if (ret) {
8466                 fprintf(stderr, "error resetting the block groups\n");
8467                 return ret;
8468         }
8469
8470         /* Ok we can allocate now, reinit the extent root */
8471         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8472         if (ret) {
8473                 fprintf(stderr, "extent root initialization failed\n");
8474                 /*
8475                  * When the transaction code is updated we should end the
8476                  * transaction, but for now progs only knows about commit so
8477                  * just return an error.
8478                  */
8479                 return ret;
8480         }
8481
8482         /*
8483          * Now we have all the in-memory block groups setup so we can make
8484          * allocations properly, and the metadata we care about is safe since we
8485          * pinned all of it above.
8486          */
8487         while (1) {
8488                 struct btrfs_block_group_cache *cache;
8489
8490                 cache = btrfs_lookup_first_block_group(fs_info, start);
8491                 if (!cache)
8492                         break;
8493                 start = cache->key.objectid + cache->key.offset;
8494                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8495                                         &cache->key, &cache->item,
8496                                         sizeof(cache->item));
8497                 if (ret) {
8498                         fprintf(stderr, "Error adding block group\n");
8499                         return ret;
8500                 }
8501                 btrfs_extent_post_op(trans, fs_info->extent_root);
8502         }
8503
8504         ret = reset_balance(trans, fs_info);
8505         if (ret)
8506                 fprintf(stderr, "error reseting the pending balance\n");
8507
8508         return ret;
8509 }
8510
8511 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8512 {
8513         struct btrfs_path *path;
8514         struct btrfs_trans_handle *trans;
8515         struct btrfs_key key;
8516         int ret;
8517
8518         printf("Recowing metadata block %llu\n", eb->start);
8519         key.objectid = btrfs_header_owner(eb);
8520         key.type = BTRFS_ROOT_ITEM_KEY;
8521         key.offset = (u64)-1;
8522
8523         root = btrfs_read_fs_root(root->fs_info, &key);
8524         if (IS_ERR(root)) {
8525                 fprintf(stderr, "Couldn't find owner root %llu\n",
8526                         key.objectid);
8527                 return PTR_ERR(root);
8528         }
8529
8530         path = btrfs_alloc_path();
8531         if (!path)
8532                 return -ENOMEM;
8533
8534         trans = btrfs_start_transaction(root, 1);
8535         if (IS_ERR(trans)) {
8536                 btrfs_free_path(path);
8537                 return PTR_ERR(trans);
8538         }
8539
8540         path->lowest_level = btrfs_header_level(eb);
8541         if (path->lowest_level)
8542                 btrfs_node_key_to_cpu(eb, &key, 0);
8543         else
8544                 btrfs_item_key_to_cpu(eb, &key, 0);
8545
8546         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8547         btrfs_commit_transaction(trans, root);
8548         btrfs_free_path(path);
8549         return ret;
8550 }
8551
8552 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8553 {
8554         struct btrfs_path *path;
8555         struct btrfs_trans_handle *trans;
8556         struct btrfs_key key;
8557         int ret;
8558
8559         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8560                bad->key.type, bad->key.offset);
8561         key.objectid = bad->root_id;
8562         key.type = BTRFS_ROOT_ITEM_KEY;
8563         key.offset = (u64)-1;
8564
8565         root = btrfs_read_fs_root(root->fs_info, &key);
8566         if (IS_ERR(root)) {
8567                 fprintf(stderr, "Couldn't find owner root %llu\n",
8568                         key.objectid);
8569                 return PTR_ERR(root);
8570         }
8571
8572         path = btrfs_alloc_path();
8573         if (!path)
8574                 return -ENOMEM;
8575
8576         trans = btrfs_start_transaction(root, 1);
8577         if (IS_ERR(trans)) {
8578                 btrfs_free_path(path);
8579                 return PTR_ERR(trans);
8580         }
8581
8582         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8583         if (ret) {
8584                 if (ret > 0)
8585                         ret = 0;
8586                 goto out;
8587         }
8588         ret = btrfs_del_item(trans, root, path);
8589 out:
8590         btrfs_commit_transaction(trans, root);
8591         btrfs_free_path(path);
8592         return ret;
8593 }
8594
8595 static int zero_log_tree(struct btrfs_root *root)
8596 {
8597         struct btrfs_trans_handle *trans;
8598         int ret;
8599
8600         trans = btrfs_start_transaction(root, 1);
8601         if (IS_ERR(trans)) {
8602                 ret = PTR_ERR(trans);
8603                 return ret;
8604         }
8605         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8606         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8607         ret = btrfs_commit_transaction(trans, root);
8608         return ret;
8609 }
8610
8611 static int populate_csum(struct btrfs_trans_handle *trans,
8612                          struct btrfs_root *csum_root, char *buf, u64 start,
8613                          u64 len)
8614 {
8615         u64 offset = 0;
8616         u64 sectorsize;
8617         int ret = 0;
8618
8619         while (offset < len) {
8620                 sectorsize = csum_root->sectorsize;
8621                 ret = read_extent_data(csum_root, buf, start + offset,
8622                                        &sectorsize, 0);
8623                 if (ret)
8624                         break;
8625                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8626                                             start + offset, buf, sectorsize);
8627                 if (ret)
8628                         break;
8629                 offset += sectorsize;
8630         }
8631         return ret;
8632 }
8633
8634 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8635                                       struct btrfs_root *csum_root,
8636                                       struct btrfs_root *cur_root)
8637 {
8638         struct btrfs_path *path;
8639         struct btrfs_key key;
8640         struct extent_buffer *node;
8641         struct btrfs_file_extent_item *fi;
8642         char *buf = NULL;
8643         u64 start = 0;
8644         u64 len = 0;
8645         int slot = 0;
8646         int ret = 0;
8647
8648         path = btrfs_alloc_path();
8649         if (!path)
8650                 return -ENOMEM;
8651         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8652         if (!buf) {
8653                 ret = -ENOMEM;
8654                 goto out;
8655         }
8656
8657         key.objectid = 0;
8658         key.offset = 0;
8659         key.type = 0;
8660
8661         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8662         if (ret < 0)
8663                 goto out;
8664         /* Iterate all regular file extents and fill its csum */
8665         while (1) {
8666                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8667
8668                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8669                         goto next;
8670                 node = path->nodes[0];
8671                 slot = path->slots[0];
8672                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8673                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8674                         goto next;
8675                 start = btrfs_file_extent_disk_bytenr(node, fi);
8676                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8677
8678                 ret = populate_csum(trans, csum_root, buf, start, len);
8679                 if (ret == -EEXIST)
8680                         ret = 0;
8681                 if (ret < 0)
8682                         goto out;
8683 next:
8684                 /*
8685                  * TODO: if next leaf is corrupted, jump to nearest next valid
8686                  * leaf.
8687                  */
8688                 ret = btrfs_next_item(cur_root, path);
8689                 if (ret < 0)
8690                         goto out;
8691                 if (ret > 0) {
8692                         ret = 0;
8693                         goto out;
8694                 }
8695         }
8696
8697 out:
8698         btrfs_free_path(path);
8699         free(buf);
8700         return ret;
8701 }
8702
8703 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8704                                   struct btrfs_root *csum_root)
8705 {
8706         struct btrfs_fs_info *fs_info = csum_root->fs_info;
8707         struct btrfs_path *path;
8708         struct btrfs_root *tree_root = fs_info->tree_root;
8709         struct btrfs_root *cur_root;
8710         struct extent_buffer *node;
8711         struct btrfs_key key;
8712         int slot = 0;
8713         int ret = 0;
8714
8715         path = btrfs_alloc_path();
8716         if (!path)
8717                 return -ENOMEM;
8718
8719         key.objectid = BTRFS_FS_TREE_OBJECTID;
8720         key.offset = 0;
8721         key.type = BTRFS_ROOT_ITEM_KEY;
8722
8723         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8724         if (ret < 0)
8725                 goto out;
8726         if (ret > 0) {
8727                 ret = -ENOENT;
8728                 goto out;
8729         }
8730
8731         while (1) {
8732                 node = path->nodes[0];
8733                 slot = path->slots[0];
8734                 btrfs_item_key_to_cpu(node, &key, slot);
8735                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8736                         goto out;
8737                 if (key.type != BTRFS_ROOT_ITEM_KEY)
8738                         goto next;
8739                 if (!is_fstree(key.objectid))
8740                         goto next;
8741                 key.offset = (u64)-1;
8742
8743                 cur_root = btrfs_read_fs_root(fs_info, &key);
8744                 if (IS_ERR(cur_root) || !cur_root) {
8745                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8746                                 key.objectid);
8747                         goto out;
8748                 }
8749                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8750                                 cur_root);
8751                 if (ret < 0)
8752                         goto out;
8753 next:
8754                 ret = btrfs_next_item(tree_root, path);
8755                 if (ret > 0) {
8756                         ret = 0;
8757                         goto out;
8758                 }
8759                 if (ret < 0)
8760                         goto out;
8761         }
8762
8763 out:
8764         btrfs_free_path(path);
8765         return ret;
8766 }
8767
8768 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8769                                       struct btrfs_root *csum_root)
8770 {
8771         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8772         struct btrfs_path *path;
8773         struct btrfs_extent_item *ei;
8774         struct extent_buffer *leaf;
8775         char *buf;
8776         struct btrfs_key key;
8777         int ret;
8778
8779         path = btrfs_alloc_path();
8780         if (!path)
8781                 return -ENOMEM;
8782
8783         key.objectid = 0;
8784         key.type = BTRFS_EXTENT_ITEM_KEY;
8785         key.offset = 0;
8786
8787         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
8788         if (ret < 0) {
8789                 btrfs_free_path(path);
8790                 return ret;
8791         }
8792
8793         buf = malloc(csum_root->sectorsize);
8794         if (!buf) {
8795                 btrfs_free_path(path);
8796                 return -ENOMEM;
8797         }
8798
8799         while (1) {
8800                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8801                         ret = btrfs_next_leaf(extent_root, path);
8802                         if (ret < 0)
8803                                 break;
8804                         if (ret) {
8805                                 ret = 0;
8806                                 break;
8807                         }
8808                 }
8809                 leaf = path->nodes[0];
8810
8811                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8812                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8813                         path->slots[0]++;
8814                         continue;
8815                 }
8816
8817                 ei = btrfs_item_ptr(leaf, path->slots[0],
8818                                     struct btrfs_extent_item);
8819                 if (!(btrfs_extent_flags(leaf, ei) &
8820                       BTRFS_EXTENT_FLAG_DATA)) {
8821                         path->slots[0]++;
8822                         continue;
8823                 }
8824
8825                 ret = populate_csum(trans, csum_root, buf, key.objectid,
8826                                     key.offset);
8827                 if (ret)
8828                         break;
8829                 path->slots[0]++;
8830         }
8831
8832         btrfs_free_path(path);
8833         free(buf);
8834         return ret;
8835 }
8836
8837 /*
8838  * Recalculate the csum and put it into the csum tree.
8839  *
8840  * Extent tree init will wipe out all the extent info, so in that case, we
8841  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
8842  * will use fs/subvol trees to init the csum tree.
8843  */
8844 static int fill_csum_tree(struct btrfs_trans_handle *trans,
8845                           struct btrfs_root *csum_root,
8846                           int search_fs_tree)
8847 {
8848         if (search_fs_tree)
8849                 return fill_csum_tree_from_fs(trans, csum_root);
8850         else
8851                 return fill_csum_tree_from_extent(trans, csum_root);
8852 }
8853
8854 struct root_item_info {
8855         /* level of the root */
8856         u8 level;
8857         /* number of nodes at this level, must be 1 for a root */
8858         int node_count;
8859         u64 bytenr;
8860         u64 gen;
8861         struct cache_extent cache_extent;
8862 };
8863
8864 static struct cache_tree *roots_info_cache = NULL;
8865
8866 static void free_roots_info_cache(void)
8867 {
8868         if (!roots_info_cache)
8869                 return;
8870
8871         while (!cache_tree_empty(roots_info_cache)) {
8872                 struct cache_extent *entry;
8873                 struct root_item_info *rii;
8874
8875                 entry = first_cache_extent(roots_info_cache);
8876                 if (!entry)
8877                         break;
8878                 remove_cache_extent(roots_info_cache, entry);
8879                 rii = container_of(entry, struct root_item_info, cache_extent);
8880                 free(rii);
8881         }
8882
8883         free(roots_info_cache);
8884         roots_info_cache = NULL;
8885 }
8886
8887 static int build_roots_info_cache(struct btrfs_fs_info *info)
8888 {
8889         int ret = 0;
8890         struct btrfs_key key;
8891         struct extent_buffer *leaf;
8892         struct btrfs_path *path;
8893
8894         if (!roots_info_cache) {
8895                 roots_info_cache = malloc(sizeof(*roots_info_cache));
8896                 if (!roots_info_cache)
8897                         return -ENOMEM;
8898                 cache_tree_init(roots_info_cache);
8899         }
8900
8901         path = btrfs_alloc_path();
8902         if (!path)
8903                 return -ENOMEM;
8904
8905         key.objectid = 0;
8906         key.type = BTRFS_EXTENT_ITEM_KEY;
8907         key.offset = 0;
8908
8909         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
8910         if (ret < 0)
8911                 goto out;
8912         leaf = path->nodes[0];
8913
8914         while (1) {
8915                 struct btrfs_key found_key;
8916                 struct btrfs_extent_item *ei;
8917                 struct btrfs_extent_inline_ref *iref;
8918                 int slot = path->slots[0];
8919                 int type;
8920                 u64 flags;
8921                 u64 root_id;
8922                 u8 level;
8923                 struct cache_extent *entry;
8924                 struct root_item_info *rii;
8925
8926                 if (slot >= btrfs_header_nritems(leaf)) {
8927                         ret = btrfs_next_leaf(info->extent_root, path);
8928                         if (ret < 0) {
8929                                 break;
8930                         } else if (ret) {
8931                                 ret = 0;
8932                                 break;
8933                         }
8934                         leaf = path->nodes[0];
8935                         slot = path->slots[0];
8936                 }
8937
8938                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
8939
8940                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8941                     found_key.type != BTRFS_METADATA_ITEM_KEY)
8942                         goto next;
8943
8944                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8945                 flags = btrfs_extent_flags(leaf, ei);
8946
8947                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
8948                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
8949                         goto next;
8950
8951                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
8952                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8953                         level = found_key.offset;
8954                 } else {
8955                         struct btrfs_tree_block_info *info;
8956
8957                         info = (struct btrfs_tree_block_info *)(ei + 1);
8958                         iref = (struct btrfs_extent_inline_ref *)(info + 1);
8959                         level = btrfs_tree_block_level(leaf, info);
8960                 }
8961
8962                 /*
8963                  * For a root extent, it must be of the following type and the
8964                  * first (and only one) iref in the item.
8965                  */
8966                 type = btrfs_extent_inline_ref_type(leaf, iref);
8967                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
8968                         goto next;
8969
8970                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
8971                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
8972                 if (!entry) {
8973                         rii = malloc(sizeof(struct root_item_info));
8974                         if (!rii) {
8975                                 ret = -ENOMEM;
8976                                 goto out;
8977                         }
8978                         rii->cache_extent.start = root_id;
8979                         rii->cache_extent.size = 1;
8980                         rii->level = (u8)-1;
8981                         entry = &rii->cache_extent;
8982                         ret = insert_cache_extent(roots_info_cache, entry);
8983                         ASSERT(ret == 0);
8984                 } else {
8985                         rii = container_of(entry, struct root_item_info,
8986                                            cache_extent);
8987                 }
8988
8989                 ASSERT(rii->cache_extent.start == root_id);
8990                 ASSERT(rii->cache_extent.size == 1);
8991
8992                 if (level > rii->level || rii->level == (u8)-1) {
8993                         rii->level = level;
8994                         rii->bytenr = found_key.objectid;
8995                         rii->gen = btrfs_extent_generation(leaf, ei);
8996                         rii->node_count = 1;
8997                 } else if (level == rii->level) {
8998                         rii->node_count++;
8999                 }
9000 next:
9001                 path->slots[0]++;
9002         }
9003
9004 out:
9005         btrfs_free_path(path);
9006
9007         return ret;
9008 }
9009
9010 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9011                                   struct btrfs_path *path,
9012                                   const struct btrfs_key *root_key,
9013                                   const int read_only_mode)
9014 {
9015         const u64 root_id = root_key->objectid;
9016         struct cache_extent *entry;
9017         struct root_item_info *rii;
9018         struct btrfs_root_item ri;
9019         unsigned long offset;
9020
9021         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9022         if (!entry) {
9023                 fprintf(stderr,
9024                         "Error: could not find extent items for root %llu\n",
9025                         root_key->objectid);
9026                 return -ENOENT;
9027         }
9028
9029         rii = container_of(entry, struct root_item_info, cache_extent);
9030         ASSERT(rii->cache_extent.start == root_id);
9031         ASSERT(rii->cache_extent.size == 1);
9032
9033         if (rii->node_count != 1) {
9034                 fprintf(stderr,
9035                         "Error: could not find btree root extent for root %llu\n",
9036                         root_id);
9037                 return -ENOENT;
9038         }
9039
9040         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9041         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9042
9043         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9044             btrfs_root_level(&ri) != rii->level ||
9045             btrfs_root_generation(&ri) != rii->gen) {
9046
9047                 /*
9048                  * If we're in repair mode but our caller told us to not update
9049                  * the root item, i.e. just check if it needs to be updated, don't
9050                  * print this message, since the caller will call us again shortly
9051                  * for the same root item without read only mode (the caller will
9052                  * open a transaction first).
9053                  */
9054                 if (!(read_only_mode && repair))
9055                         fprintf(stderr,
9056                                 "%sroot item for root %llu,"
9057                                 " current bytenr %llu, current gen %llu, current level %u,"
9058                                 " new bytenr %llu, new gen %llu, new level %u\n",
9059                                 (read_only_mode ? "" : "fixing "),
9060                                 root_id,
9061                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9062                                 btrfs_root_level(&ri),
9063                                 rii->bytenr, rii->gen, rii->level);
9064
9065                 if (btrfs_root_generation(&ri) > rii->gen) {
9066                         fprintf(stderr,
9067                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9068                                 root_id, btrfs_root_generation(&ri), rii->gen);
9069                         return -EINVAL;
9070                 }
9071
9072                 if (!read_only_mode) {
9073                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9074                         btrfs_set_root_level(&ri, rii->level);
9075                         btrfs_set_root_generation(&ri, rii->gen);
9076                         write_extent_buffer(path->nodes[0], &ri,
9077                                             offset, sizeof(ri));
9078                 }
9079
9080                 return 1;
9081         }
9082
9083         return 0;
9084 }
9085
9086 /*
9087  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9088  * caused read-only snapshots to be corrupted if they were created at a moment
9089  * when the source subvolume/snapshot had orphan items. The issue was that the
9090  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9091  * node instead of the post orphan cleanup root node.
9092  * So this function, and its callees, just detects and fixes those cases. Even
9093  * though the regression was for read-only snapshots, this function applies to
9094  * any snapshot/subvolume root.
9095  * This must be run before any other repair code - not doing it so, makes other
9096  * repair code delete or modify backrefs in the extent tree for example, which
9097  * will result in an inconsistent fs after repairing the root items.
9098  */
9099 static int repair_root_items(struct btrfs_fs_info *info)
9100 {
9101         struct btrfs_path *path = NULL;
9102         struct btrfs_key key;
9103         struct extent_buffer *leaf;
9104         struct btrfs_trans_handle *trans = NULL;
9105         int ret = 0;
9106         int bad_roots = 0;
9107         int need_trans = 0;
9108
9109         ret = build_roots_info_cache(info);
9110         if (ret)
9111                 goto out;
9112
9113         path = btrfs_alloc_path();
9114         if (!path) {
9115                 ret = -ENOMEM;
9116                 goto out;
9117         }
9118
9119         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9120         key.type = BTRFS_ROOT_ITEM_KEY;
9121         key.offset = 0;
9122
9123 again:
9124         /*
9125          * Avoid opening and committing transactions if a leaf doesn't have
9126          * any root items that need to be fixed, so that we avoid rotating
9127          * backup roots unnecessarily.
9128          */
9129         if (need_trans) {
9130                 trans = btrfs_start_transaction(info->tree_root, 1);
9131                 if (IS_ERR(trans)) {
9132                         ret = PTR_ERR(trans);
9133                         goto out;
9134                 }
9135         }
9136
9137         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9138                                 0, trans ? 1 : 0);
9139         if (ret < 0)
9140                 goto out;
9141         leaf = path->nodes[0];
9142
9143         while (1) {
9144                 struct btrfs_key found_key;
9145
9146                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9147                         int no_more_keys = find_next_key(path, &key);
9148
9149                         btrfs_release_path(path);
9150                         if (trans) {
9151                                 ret = btrfs_commit_transaction(trans,
9152                                                                info->tree_root);
9153                                 trans = NULL;
9154                                 if (ret < 0)
9155                                         goto out;
9156                         }
9157                         need_trans = 0;
9158                         if (no_more_keys)
9159                                 break;
9160                         goto again;
9161                 }
9162
9163                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9164
9165                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9166                         goto next;
9167                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9168                         goto next;
9169
9170                 ret = maybe_repair_root_item(info, path, &found_key,
9171                                              trans ? 0 : 1);
9172                 if (ret < 0)
9173                         goto out;
9174                 if (ret) {
9175                         if (!trans && repair) {
9176                                 need_trans = 1;
9177                                 key = found_key;
9178                                 btrfs_release_path(path);
9179                                 goto again;
9180                         }
9181                         bad_roots++;
9182                 }
9183 next:
9184                 path->slots[0]++;
9185         }
9186         ret = 0;
9187 out:
9188         free_roots_info_cache();
9189         if (path)
9190                 btrfs_free_path(path);
9191         if (trans)
9192                 btrfs_commit_transaction(trans, info->tree_root);
9193         if (ret < 0)
9194                 return ret;
9195
9196         return bad_roots;
9197 }
9198
9199 const char * const cmd_check_usage[] = {
9200         "btrfs check [options] <device>",
9201         "Check an unmounted btrfs filesystem.",
9202         "",
9203         "-s|--super <superblock>     use this superblock copy",
9204         "-b|--backup                 use the backup root copy",
9205         "--repair                    try to repair the filesystem",
9206         "--init-csum-tree            create a new CRC tree",
9207         "--init-extent-tree          create a new extent tree",
9208         "--check-data-csum           verify checkums of data blocks",
9209         "--qgroup-report             print a report on qgroup consistency",
9210         "--subvol-extents <subvolid> print subvolume extents and sharing state",
9211         "--tree-root <bytenr>        use the given bytenr for the tree root",
9212         NULL
9213 };
9214
9215 int cmd_check(int argc, char **argv)
9216 {
9217         struct cache_tree root_cache;
9218         struct btrfs_root *root;
9219         struct btrfs_fs_info *info;
9220         u64 bytenr = 0;
9221         u64 subvolid = 0;
9222         u64 tree_root_bytenr = 0;
9223         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9224         int ret;
9225         u64 num;
9226         int init_csum_tree = 0;
9227         int readonly = 0;
9228         int qgroup_report = 0;
9229         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9230
9231         while(1) {
9232                 int c;
9233                 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9234                         OPT_CHECK_CSUM, OPT_READONLY };
9235                 static const struct option long_options[] = {
9236                         { "super", required_argument, NULL, 's' },
9237                         { "repair", no_argument, NULL, OPT_REPAIR },
9238                         { "readonly", no_argument, NULL, OPT_READONLY },
9239                         { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9240                         { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9241                         { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9242                         { "backup", no_argument, NULL, 'b' },
9243                         { "subvol-extents", required_argument, NULL, 'E' },
9244                         { "qgroup-report", no_argument, NULL, 'Q' },
9245                         { "tree-root", required_argument, NULL, 'r' },
9246                         { NULL, 0, NULL, 0}
9247                 };
9248
9249                 c = getopt_long(argc, argv, "as:br:", long_options, NULL);
9250                 if (c < 0)
9251                         break;
9252                 switch(c) {
9253                         case 'a': /* ignored */ break;
9254                         case 'b':
9255                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9256                                 break;
9257                         case 's':
9258                                 num = arg_strtou64(optarg);
9259                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9260                                         fprintf(stderr,
9261                                                 "ERROR: super mirror should be less than: %d\n",
9262                                                 BTRFS_SUPER_MIRROR_MAX);
9263                                         exit(1);
9264                                 }
9265                                 bytenr = btrfs_sb_offset(((int)num));
9266                                 printf("using SB copy %llu, bytenr %llu\n", num,
9267                                        (unsigned long long)bytenr);
9268                                 break;
9269                         case 'Q':
9270                                 qgroup_report = 1;
9271                                 break;
9272                         case 'E':
9273                                 subvolid = arg_strtou64(optarg);
9274                                 break;
9275                         case 'r':
9276                                 tree_root_bytenr = arg_strtou64(optarg);
9277                                 break;
9278                         case '?':
9279                         case 'h':
9280                                 usage(cmd_check_usage);
9281                         case OPT_REPAIR:
9282                                 printf("enabling repair mode\n");
9283                                 repair = 1;
9284                                 ctree_flags |= OPEN_CTREE_WRITES;
9285                                 break;
9286                         case OPT_READONLY:
9287                                 readonly = 1;
9288                                 break;
9289                         case OPT_INIT_CSUM:
9290                                 printf("Creating a new CRC tree\n");
9291                                 init_csum_tree = 1;
9292                                 repair = 1;
9293                                 ctree_flags |= OPEN_CTREE_WRITES;
9294                                 break;
9295                         case OPT_INIT_EXTENT:
9296                                 init_extent_tree = 1;
9297                                 ctree_flags |= (OPEN_CTREE_WRITES |
9298                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9299                                 repair = 1;
9300                                 break;
9301                         case OPT_CHECK_CSUM:
9302                                 check_data_csum = 1;
9303                                 break;
9304                 }
9305         }
9306         argc = argc - optind;
9307
9308         if (check_argc_exact(argc, 1))
9309                 usage(cmd_check_usage);
9310
9311         /* This check is the only reason for --readonly to exist */
9312         if (readonly && repair) {
9313                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9314                 exit(1);
9315         }
9316
9317         radix_tree_init();
9318         cache_tree_init(&root_cache);
9319
9320         if((ret = check_mounted(argv[optind])) < 0) {
9321                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9322                 goto err_out;
9323         } else if(ret) {
9324                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9325                 ret = -EBUSY;
9326                 goto err_out;
9327         }
9328
9329         /* only allow partial opening under repair mode */
9330         if (repair)
9331                 ctree_flags |= OPEN_CTREE_PARTIAL;
9332
9333         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9334                                   ctree_flags);
9335         if (!info) {
9336                 fprintf(stderr, "Couldn't open file system\n");
9337                 ret = -EIO;
9338                 goto err_out;
9339         }
9340
9341         root = info->fs_root;
9342
9343         /*
9344          * repair mode will force us to commit transaction which
9345          * will make us fail to load log tree when mounting.
9346          */
9347         if (repair && btrfs_super_log_root(info->super_copy)) {
9348                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9349                 if (!ret) {
9350                         ret = 1;
9351                         goto close_out;
9352                 }
9353                 ret = zero_log_tree(root);
9354                 if (ret) {
9355                         fprintf(stderr, "fail to zero log tree\n");
9356                         goto close_out;
9357                 }
9358         }
9359
9360         uuid_unparse(info->super_copy->fsid, uuidbuf);
9361         if (qgroup_report) {
9362                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9363                        uuidbuf);
9364                 ret = qgroup_verify_all(info);
9365                 if (ret == 0)
9366                         print_qgroup_report(1);
9367                 goto close_out;
9368         }
9369         if (subvolid) {
9370                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9371                        subvolid, argv[optind], uuidbuf);
9372                 ret = print_extent_state(info, subvolid);
9373                 goto close_out;
9374         }
9375         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9376
9377         if (!extent_buffer_uptodate(info->tree_root->node) ||
9378             !extent_buffer_uptodate(info->dev_root->node) ||
9379             !extent_buffer_uptodate(info->chunk_root->node)) {
9380                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9381                 ret = -EIO;
9382                 goto close_out;
9383         }
9384
9385         if (init_extent_tree || init_csum_tree) {
9386                 struct btrfs_trans_handle *trans;
9387
9388                 trans = btrfs_start_transaction(info->extent_root, 0);
9389                 if (IS_ERR(trans)) {
9390                         fprintf(stderr, "Error starting transaction\n");
9391                         ret = PTR_ERR(trans);
9392                         goto close_out;
9393                 }
9394
9395                 if (init_extent_tree) {
9396                         printf("Creating a new extent tree\n");
9397                         ret = reinit_extent_tree(trans, info);
9398                         if (ret)
9399                                 goto close_out;
9400                 }
9401
9402                 if (init_csum_tree) {
9403                         fprintf(stderr, "Reinit crc root\n");
9404                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9405                         if (ret) {
9406                                 fprintf(stderr, "crc root initialization failed\n");
9407                                 ret = -EIO;
9408                                 goto close_out;
9409                         }
9410
9411                         ret = fill_csum_tree(trans, info->csum_root,
9412                                              init_extent_tree);
9413                         if (ret) {
9414                                 fprintf(stderr, "crc refilling failed\n");
9415                                 return -EIO;
9416                         }
9417                 }
9418                 /*
9419                  * Ok now we commit and run the normal fsck, which will add
9420                  * extent entries for all of the items it finds.
9421                  */
9422                 ret = btrfs_commit_transaction(trans, info->extent_root);
9423                 if (ret)
9424                         goto close_out;
9425         }
9426         if (!extent_buffer_uptodate(info->extent_root->node)) {
9427                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9428                 ret = -EIO;
9429                 goto close_out;
9430         }
9431         if (!extent_buffer_uptodate(info->csum_root->node)) {
9432                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9433                 ret = -EIO;
9434                 goto close_out;
9435         }
9436
9437         fprintf(stderr, "checking extents\n");
9438         ret = check_chunks_and_extents(root);
9439         if (ret)
9440                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9441
9442         ret = repair_root_items(info);
9443         if (ret < 0)
9444                 goto close_out;
9445         if (repair) {
9446                 fprintf(stderr, "Fixed %d roots.\n", ret);
9447                 ret = 0;
9448         } else if (ret > 0) {
9449                 fprintf(stderr,
9450                        "Found %d roots with an outdated root item.\n",
9451                        ret);
9452                 fprintf(stderr,
9453                         "Please run a filesystem check with the option --repair to fix them.\n");
9454                 ret = 1;
9455                 goto close_out;
9456         }
9457
9458         fprintf(stderr, "checking free space cache\n");
9459         ret = check_space_cache(root);
9460         if (ret)
9461                 goto out;
9462
9463         /*
9464          * We used to have to have these hole extents in between our real
9465          * extents so if we don't have this flag set we need to make sure there
9466          * are no gaps in the file extents for inodes, otherwise we can just
9467          * ignore it when this happens.
9468          */
9469         no_holes = btrfs_fs_incompat(root->fs_info,
9470                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9471         fprintf(stderr, "checking fs roots\n");
9472         ret = check_fs_roots(root, &root_cache);
9473         if (ret)
9474                 goto out;
9475
9476         fprintf(stderr, "checking csums\n");
9477         ret = check_csums(root);
9478         if (ret)
9479                 goto out;
9480
9481         fprintf(stderr, "checking root refs\n");
9482         ret = check_root_refs(root, &root_cache);
9483         if (ret)
9484                 goto out;
9485
9486         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9487                 struct extent_buffer *eb;
9488
9489                 eb = list_first_entry(&root->fs_info->recow_ebs,
9490                                       struct extent_buffer, recow);
9491                 list_del_init(&eb->recow);
9492                 ret = recow_extent_buffer(root, eb);
9493                 if (ret)
9494                         break;
9495         }
9496
9497         while (!list_empty(&delete_items)) {
9498                 struct bad_item *bad;
9499
9500                 bad = list_first_entry(&delete_items, struct bad_item, list);
9501                 list_del_init(&bad->list);
9502                 if (repair)
9503                         ret = delete_bad_item(root, bad);
9504                 free(bad);
9505         }
9506
9507         if (info->quota_enabled) {
9508                 int err;
9509                 fprintf(stderr, "checking quota groups\n");
9510                 err = qgroup_verify_all(info);
9511                 if (err)
9512                         goto out;
9513         }
9514
9515         if (!list_empty(&root->fs_info->recow_ebs)) {
9516                 fprintf(stderr, "Transid errors in file system\n");
9517                 ret = 1;
9518         }
9519 out:
9520         print_qgroup_report(0);
9521         if (found_old_backref) { /*
9522                  * there was a disk format change when mixed
9523                  * backref was in testing tree. The old format
9524                  * existed about one week.
9525                  */
9526                 printf("\n * Found old mixed backref format. "
9527                        "The old format is not supported! *"
9528                        "\n * Please mount the FS in readonly mode, "
9529                        "backup data and re-format the FS. *\n\n");
9530                 ret = 1;
9531         }
9532         printf("found %llu bytes used err is %d\n",
9533                (unsigned long long)bytes_used, ret);
9534         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9535         printf("total tree bytes: %llu\n",
9536                (unsigned long long)total_btree_bytes);
9537         printf("total fs tree bytes: %llu\n",
9538                (unsigned long long)total_fs_tree_bytes);
9539         printf("total extent tree bytes: %llu\n",
9540                (unsigned long long)total_extent_tree_bytes);
9541         printf("btree space waste bytes: %llu\n",
9542                (unsigned long long)btree_space_waste);
9543         printf("file data blocks allocated: %llu\n referenced %llu\n",
9544                 (unsigned long long)data_bytes_allocated,
9545                 (unsigned long long)data_bytes_referenced);
9546         printf("%s\n", PACKAGE_STRING);
9547
9548         free_root_recs_tree(&root_cache);
9549 close_out:
9550         close_ctree(root);
9551 err_out:
9552         return ret;
9553 }