Revert "btrfs-progs: check: switch to iterating over the backref_tree"
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 enum btrfs_check_mode {
78         CHECK_MODE_ORIGINAL,
79         CHECK_MODE_LOWMEM,
80         CHECK_MODE_UNKNOWN,
81         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
82 };
83
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
85
86 struct extent_backref {
87         struct list_head list;
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 {
98         return list_entry(entry, struct extent_backref, list);
99 }
100
101 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
102 {
103         return rb_entry(node, struct extent_backref, node);
104 }
105
106 struct data_backref {
107         struct extent_backref node;
108         union {
109                 u64 parent;
110                 u64 root;
111         };
112         u64 owner;
113         u64 offset;
114         u64 disk_bytenr;
115         u64 bytes;
116         u64 ram_bytes;
117         u32 num_refs;
118         u32 found_ref;
119 };
120
121 static inline struct data_backref* to_data_backref(struct extent_backref *back)
122 {
123         return container_of(back, struct data_backref, node);
124 }
125
126 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
127 {
128         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
129         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
130         struct data_backref *back1 = to_data_backref(ext1);
131         struct data_backref *back2 = to_data_backref(ext2);
132
133         WARN_ON(!ext1->is_data);
134         WARN_ON(!ext2->is_data);
135
136         /* parent and root are a union, so this covers both */
137         if (back1->parent > back2->parent)
138                 return 1;
139         if (back1->parent < back2->parent)
140                 return -1;
141
142         /* This is a full backref and the parents match. */
143         if (back1->node.full_backref)
144                 return 0;
145
146         if (back1->owner > back2->owner)
147                 return 1;
148         if (back1->owner < back2->owner)
149                 return -1;
150
151         if (back1->offset > back2->offset)
152                 return 1;
153         if (back1->offset < back2->offset)
154                 return -1;
155
156         if (back1->bytes > back2->bytes)
157                 return 1;
158         if (back1->bytes < back2->bytes)
159                 return -1;
160
161         if (back1->found_ref && back2->found_ref) {
162                 if (back1->disk_bytenr > back2->disk_bytenr)
163                         return 1;
164                 if (back1->disk_bytenr < back2->disk_bytenr)
165                         return -1;
166
167                 if (back1->found_ref > back2->found_ref)
168                         return 1;
169                 if (back1->found_ref < back2->found_ref)
170                         return -1;
171         }
172
173         return 0;
174 }
175
176 /*
177  * Much like data_backref, just removed the undetermined members
178  * and change it to use list_head.
179  * During extent scan, it is stored in root->orphan_data_extent.
180  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
181  */
182 struct orphan_data_extent {
183         struct list_head list;
184         u64 root;
185         u64 objectid;
186         u64 offset;
187         u64 disk_bytenr;
188         u64 disk_len;
189 };
190
191 struct tree_backref {
192         struct extent_backref node;
193         union {
194                 u64 parent;
195                 u64 root;
196         };
197 };
198
199 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
200 {
201         return container_of(back, struct tree_backref, node);
202 }
203
204 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
205 {
206         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
207         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
208         struct tree_backref *back1 = to_tree_backref(ext1);
209         struct tree_backref *back2 = to_tree_backref(ext2);
210
211         WARN_ON(ext1->is_data);
212         WARN_ON(ext2->is_data);
213
214         /* parent and root are a union, so this covers both */
215         if (back1->parent > back2->parent)
216                 return 1;
217         if (back1->parent < back2->parent)
218                 return -1;
219
220         return 0;
221 }
222
223 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
224 {
225         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
226         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
227
228         if (ext1->is_data > ext2->is_data)
229                 return 1;
230
231         if (ext1->is_data < ext2->is_data)
232                 return -1;
233
234         if (ext1->full_backref > ext2->full_backref)
235                 return 1;
236         if (ext1->full_backref < ext2->full_backref)
237                 return -1;
238
239         if (ext1->is_data)
240                 return compare_data_backref(node1, node2);
241         else
242                 return compare_tree_backref(node1, node2);
243 }
244
245 /* Explicit initialization for extent_record::flag_block_full_backref */
246 enum { FLAG_UNSET = 2 };
247
248 struct extent_record {
249         struct list_head backrefs;
250         struct list_head dups;
251         struct rb_root backref_tree;
252         struct list_head list;
253         struct cache_extent cache;
254         struct btrfs_disk_key parent_key;
255         u64 start;
256         u64 max_size;
257         u64 nr;
258         u64 refs;
259         u64 extent_item_refs;
260         u64 generation;
261         u64 parent_generation;
262         u64 info_objectid;
263         u32 num_duplicates;
264         u8 info_level;
265         unsigned int flag_block_full_backref:2;
266         unsigned int found_rec:1;
267         unsigned int content_checked:1;
268         unsigned int owner_ref_checked:1;
269         unsigned int is_root:1;
270         unsigned int metadata:1;
271         unsigned int bad_full_backref:1;
272         unsigned int crossing_stripes:1;
273         unsigned int wrong_chunk_type:1;
274 };
275
276 static inline struct extent_record* to_extent_record(struct list_head *entry)
277 {
278         return container_of(entry, struct extent_record, list);
279 }
280
281 struct inode_backref {
282         struct list_head list;
283         unsigned int found_dir_item:1;
284         unsigned int found_dir_index:1;
285         unsigned int found_inode_ref:1;
286         unsigned int filetype:8;
287         int errors;
288         unsigned int ref_type;
289         u64 dir;
290         u64 index;
291         u16 namelen;
292         char name[0];
293 };
294
295 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
296 {
297         return list_entry(entry, struct inode_backref, list);
298 }
299
300 struct root_item_record {
301         struct list_head list;
302         u64 objectid;
303         u64 bytenr;
304         u64 last_snapshot;
305         u8 level;
306         u8 drop_level;
307         int level_size;
308         struct btrfs_key drop_key;
309 };
310
311 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
312 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
313 #define REF_ERR_NO_INODE_REF            (1 << 2)
314 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
315 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
316 #define REF_ERR_DUP_INODE_REF           (1 << 5)
317 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
318 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
319 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
320 #define REF_ERR_NO_ROOT_REF             (1 << 9)
321 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
322 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
323 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
324
325 struct file_extent_hole {
326         struct rb_node node;
327         u64 start;
328         u64 len;
329 };
330
331 struct inode_record {
332         struct list_head backrefs;
333         unsigned int checked:1;
334         unsigned int merging:1;
335         unsigned int found_inode_item:1;
336         unsigned int found_dir_item:1;
337         unsigned int found_file_extent:1;
338         unsigned int found_csum_item:1;
339         unsigned int some_csum_missing:1;
340         unsigned int nodatasum:1;
341         int errors;
342
343         u64 ino;
344         u32 nlink;
345         u32 imode;
346         u64 isize;
347         u64 nbytes;
348
349         u32 found_link;
350         u64 found_size;
351         u64 extent_start;
352         u64 extent_end;
353         struct rb_root holes;
354         struct list_head orphan_extents;
355
356         u32 refs;
357 };
358
359 #define I_ERR_NO_INODE_ITEM             (1 << 0)
360 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
361 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
362 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
363 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
364 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
365 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
366 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
367 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
368 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
369 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
370 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
371 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
372 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
373 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
374
375 struct root_backref {
376         struct list_head list;
377         unsigned int found_dir_item:1;
378         unsigned int found_dir_index:1;
379         unsigned int found_back_ref:1;
380         unsigned int found_forward_ref:1;
381         unsigned int reachable:1;
382         int errors;
383         u64 ref_root;
384         u64 dir;
385         u64 index;
386         u16 namelen;
387         char name[0];
388 };
389
390 static inline struct root_backref* to_root_backref(struct list_head *entry)
391 {
392         return list_entry(entry, struct root_backref, list);
393 }
394
395 struct root_record {
396         struct list_head backrefs;
397         struct cache_extent cache;
398         unsigned int found_root_item:1;
399         u64 objectid;
400         u32 found_ref;
401 };
402
403 struct ptr_node {
404         struct cache_extent cache;
405         void *data;
406 };
407
408 struct shared_node {
409         struct cache_extent cache;
410         struct cache_tree root_cache;
411         struct cache_tree inode_cache;
412         struct inode_record *current;
413         u32 refs;
414 };
415
416 struct block_info {
417         u64 start;
418         u32 size;
419 };
420
421 struct walk_control {
422         struct cache_tree shared;
423         struct shared_node *nodes[BTRFS_MAX_LEVEL];
424         int active_node;
425         int root_level;
426 };
427
428 struct bad_item {
429         struct btrfs_key key;
430         u64 root_id;
431         struct list_head list;
432 };
433
434 struct extent_entry {
435         u64 bytenr;
436         u64 bytes;
437         int count;
438         int broken;
439         struct list_head list;
440 };
441
442 struct root_item_info {
443         /* level of the root */
444         u8 level;
445         /* number of nodes at this level, must be 1 for a root */
446         int node_count;
447         u64 bytenr;
448         u64 gen;
449         struct cache_extent cache_extent;
450 };
451
452 /*
453  * Error bit for low memory mode check.
454  *
455  * Currently no caller cares about it yet.  Just internal use for error
456  * classification.
457  */
458 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
459 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
460 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
461 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
462 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
463 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
464 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
465 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
466 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
467 #define CHUNK_TYPE_MISMATCH     (1 << 8)
468
469 static void *print_status_check(void *p)
470 {
471         struct task_ctx *priv = p;
472         const char work_indicator[] = { '.', 'o', 'O', 'o' };
473         uint32_t count = 0;
474         static char *task_position_string[] = {
475                 "checking extents",
476                 "checking free space cache",
477                 "checking fs roots",
478         };
479
480         task_period_start(priv->info, 1000 /* 1s */);
481
482         if (priv->tp == TASK_NOTHING)
483                 return NULL;
484
485         while (1) {
486                 printf("%s [%c]\r", task_position_string[priv->tp],
487                                 work_indicator[count % 4]);
488                 count++;
489                 fflush(stdout);
490                 task_period_wait(priv->info);
491         }
492         return NULL;
493 }
494
495 static int print_status_return(void *p)
496 {
497         printf("\n");
498         fflush(stdout);
499
500         return 0;
501 }
502
503 static enum btrfs_check_mode parse_check_mode(const char *str)
504 {
505         if (strcmp(str, "lowmem") == 0)
506                 return CHECK_MODE_LOWMEM;
507         if (strcmp(str, "orig") == 0)
508                 return CHECK_MODE_ORIGINAL;
509         if (strcmp(str, "original") == 0)
510                 return CHECK_MODE_ORIGINAL;
511
512         return CHECK_MODE_UNKNOWN;
513 }
514
515 /* Compatible function to allow reuse of old codes */
516 static u64 first_extent_gap(struct rb_root *holes)
517 {
518         struct file_extent_hole *hole;
519
520         if (RB_EMPTY_ROOT(holes))
521                 return (u64)-1;
522
523         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
524         return hole->start;
525 }
526
527 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
528 {
529         struct file_extent_hole *hole1;
530         struct file_extent_hole *hole2;
531
532         hole1 = rb_entry(node1, struct file_extent_hole, node);
533         hole2 = rb_entry(node2, struct file_extent_hole, node);
534
535         if (hole1->start > hole2->start)
536                 return -1;
537         if (hole1->start < hole2->start)
538                 return 1;
539         /* Now hole1->start == hole2->start */
540         if (hole1->len >= hole2->len)
541                 /*
542                  * Hole 1 will be merge center
543                  * Same hole will be merged later
544                  */
545                 return -1;
546         /* Hole 2 will be merge center */
547         return 1;
548 }
549
550 /*
551  * Add a hole to the record
552  *
553  * This will do hole merge for copy_file_extent_holes(),
554  * which will ensure there won't be continuous holes.
555  */
556 static int add_file_extent_hole(struct rb_root *holes,
557                                 u64 start, u64 len)
558 {
559         struct file_extent_hole *hole;
560         struct file_extent_hole *prev = NULL;
561         struct file_extent_hole *next = NULL;
562
563         hole = malloc(sizeof(*hole));
564         if (!hole)
565                 return -ENOMEM;
566         hole->start = start;
567         hole->len = len;
568         /* Since compare will not return 0, no -EEXIST will happen */
569         rb_insert(holes, &hole->node, compare_hole);
570
571         /* simple merge with previous hole */
572         if (rb_prev(&hole->node))
573                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
574                                 node);
575         if (prev && prev->start + prev->len >= hole->start) {
576                 hole->len = hole->start + hole->len - prev->start;
577                 hole->start = prev->start;
578                 rb_erase(&prev->node, holes);
579                 free(prev);
580                 prev = NULL;
581         }
582
583         /* iterate merge with next holes */
584         while (1) {
585                 if (!rb_next(&hole->node))
586                         break;
587                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
588                                         node);
589                 if (hole->start + hole->len >= next->start) {
590                         if (hole->start + hole->len <= next->start + next->len)
591                                 hole->len = next->start + next->len -
592                                             hole->start;
593                         rb_erase(&next->node, holes);
594                         free(next);
595                         next = NULL;
596                 } else
597                         break;
598         }
599         return 0;
600 }
601
602 static int compare_hole_range(struct rb_node *node, void *data)
603 {
604         struct file_extent_hole *hole;
605         u64 start;
606
607         hole = (struct file_extent_hole *)data;
608         start = hole->start;
609
610         hole = rb_entry(node, struct file_extent_hole, node);
611         if (start < hole->start)
612                 return -1;
613         if (start >= hole->start && start < hole->start + hole->len)
614                 return 0;
615         return 1;
616 }
617
618 /*
619  * Delete a hole in the record
620  *
621  * This will do the hole split and is much restrict than add.
622  */
623 static int del_file_extent_hole(struct rb_root *holes,
624                                 u64 start, u64 len)
625 {
626         struct file_extent_hole *hole;
627         struct file_extent_hole tmp;
628         u64 prev_start = 0;
629         u64 prev_len = 0;
630         u64 next_start = 0;
631         u64 next_len = 0;
632         struct rb_node *node;
633         int have_prev = 0;
634         int have_next = 0;
635         int ret = 0;
636
637         tmp.start = start;
638         tmp.len = len;
639         node = rb_search(holes, &tmp, compare_hole_range, NULL);
640         if (!node)
641                 return -EEXIST;
642         hole = rb_entry(node, struct file_extent_hole, node);
643         if (start + len > hole->start + hole->len)
644                 return -EEXIST;
645
646         /*
647          * Now there will be no overlap, delete the hole and re-add the
648          * split(s) if they exists.
649          */
650         if (start > hole->start) {
651                 prev_start = hole->start;
652                 prev_len = start - hole->start;
653                 have_prev = 1;
654         }
655         if (hole->start + hole->len > start + len) {
656                 next_start = start + len;
657                 next_len = hole->start + hole->len - start - len;
658                 have_next = 1;
659         }
660         rb_erase(node, holes);
661         free(hole);
662         if (have_prev) {
663                 ret = add_file_extent_hole(holes, prev_start, prev_len);
664                 if (ret < 0)
665                         return ret;
666         }
667         if (have_next) {
668                 ret = add_file_extent_hole(holes, next_start, next_len);
669                 if (ret < 0)
670                         return ret;
671         }
672         return 0;
673 }
674
675 static int copy_file_extent_holes(struct rb_root *dst,
676                                   struct rb_root *src)
677 {
678         struct file_extent_hole *hole;
679         struct rb_node *node;
680         int ret = 0;
681
682         node = rb_first(src);
683         while (node) {
684                 hole = rb_entry(node, struct file_extent_hole, node);
685                 ret = add_file_extent_hole(dst, hole->start, hole->len);
686                 if (ret)
687                         break;
688                 node = rb_next(node);
689         }
690         return ret;
691 }
692
693 static void free_file_extent_holes(struct rb_root *holes)
694 {
695         struct rb_node *node;
696         struct file_extent_hole *hole;
697
698         node = rb_first(holes);
699         while (node) {
700                 hole = rb_entry(node, struct file_extent_hole, node);
701                 rb_erase(node, holes);
702                 free(hole);
703                 node = rb_first(holes);
704         }
705 }
706
707 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
708
709 static void record_root_in_trans(struct btrfs_trans_handle *trans,
710                                  struct btrfs_root *root)
711 {
712         if (root->last_trans != trans->transid) {
713                 root->track_dirty = 1;
714                 root->last_trans = trans->transid;
715                 root->commit_root = root->node;
716                 extent_buffer_get(root->node);
717         }
718 }
719
720 static u8 imode_to_type(u32 imode)
721 {
722 #define S_SHIFT 12
723         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
724                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
725                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
726                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
727                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
728                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
729                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
730                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
731         };
732
733         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
734 #undef S_SHIFT
735 }
736
737 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
738 {
739         struct device_record *rec1;
740         struct device_record *rec2;
741
742         rec1 = rb_entry(node1, struct device_record, node);
743         rec2 = rb_entry(node2, struct device_record, node);
744         if (rec1->devid > rec2->devid)
745                 return -1;
746         else if (rec1->devid < rec2->devid)
747                 return 1;
748         else
749                 return 0;
750 }
751
752 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
753 {
754         struct inode_record *rec;
755         struct inode_backref *backref;
756         struct inode_backref *orig;
757         struct inode_backref *tmp;
758         struct orphan_data_extent *src_orphan;
759         struct orphan_data_extent *dst_orphan;
760         size_t size;
761         int ret;
762
763         rec = malloc(sizeof(*rec));
764         if (!rec)
765                 return ERR_PTR(-ENOMEM);
766         memcpy(rec, orig_rec, sizeof(*rec));
767         rec->refs = 1;
768         INIT_LIST_HEAD(&rec->backrefs);
769         INIT_LIST_HEAD(&rec->orphan_extents);
770         rec->holes = RB_ROOT;
771
772         list_for_each_entry(orig, &orig_rec->backrefs, list) {
773                 size = sizeof(*orig) + orig->namelen + 1;
774                 backref = malloc(size);
775                 if (!backref) {
776                         ret = -ENOMEM;
777                         goto cleanup;
778                 }
779                 memcpy(backref, orig, size);
780                 list_add_tail(&backref->list, &rec->backrefs);
781         }
782         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
783                 dst_orphan = malloc(sizeof(*dst_orphan));
784                 if (!dst_orphan) {
785                         ret = -ENOMEM;
786                         goto cleanup;
787                 }
788                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
789                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
790         }
791         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
792         BUG_ON(ret < 0);
793
794         return rec;
795
796 cleanup:
797         if (!list_empty(&rec->backrefs))
798                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
799                         list_del(&orig->list);
800                         free(orig);
801                 }
802
803         if (!list_empty(&rec->orphan_extents))
804                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
805                         list_del(&orig->list);
806                         free(orig);
807                 }
808
809         free(rec);
810
811         return ERR_PTR(ret);
812 }
813
814 static void print_orphan_data_extents(struct list_head *orphan_extents,
815                                       u64 objectid)
816 {
817         struct orphan_data_extent *orphan;
818
819         if (list_empty(orphan_extents))
820                 return;
821         printf("The following data extent is lost in tree %llu:\n",
822                objectid);
823         list_for_each_entry(orphan, orphan_extents, list) {
824                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
825                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
826                        orphan->disk_len);
827         }
828 }
829
830 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
831 {
832         u64 root_objectid = root->root_key.objectid;
833         int errors = rec->errors;
834
835         if (!errors)
836                 return;
837         /* reloc root errors, we print its corresponding fs root objectid*/
838         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
839                 root_objectid = root->root_key.offset;
840                 fprintf(stderr, "reloc");
841         }
842         fprintf(stderr, "root %llu inode %llu errors %x",
843                 (unsigned long long) root_objectid,
844                 (unsigned long long) rec->ino, rec->errors);
845
846         if (errors & I_ERR_NO_INODE_ITEM)
847                 fprintf(stderr, ", no inode item");
848         if (errors & I_ERR_NO_ORPHAN_ITEM)
849                 fprintf(stderr, ", no orphan item");
850         if (errors & I_ERR_DUP_INODE_ITEM)
851                 fprintf(stderr, ", dup inode item");
852         if (errors & I_ERR_DUP_DIR_INDEX)
853                 fprintf(stderr, ", dup dir index");
854         if (errors & I_ERR_ODD_DIR_ITEM)
855                 fprintf(stderr, ", odd dir item");
856         if (errors & I_ERR_ODD_FILE_EXTENT)
857                 fprintf(stderr, ", odd file extent");
858         if (errors & I_ERR_BAD_FILE_EXTENT)
859                 fprintf(stderr, ", bad file extent");
860         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
861                 fprintf(stderr, ", file extent overlap");
862         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
863                 fprintf(stderr, ", file extent discount");
864         if (errors & I_ERR_DIR_ISIZE_WRONG)
865                 fprintf(stderr, ", dir isize wrong");
866         if (errors & I_ERR_FILE_NBYTES_WRONG)
867                 fprintf(stderr, ", nbytes wrong");
868         if (errors & I_ERR_ODD_CSUM_ITEM)
869                 fprintf(stderr, ", odd csum item");
870         if (errors & I_ERR_SOME_CSUM_MISSING)
871                 fprintf(stderr, ", some csum missing");
872         if (errors & I_ERR_LINK_COUNT_WRONG)
873                 fprintf(stderr, ", link count wrong");
874         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
875                 fprintf(stderr, ", orphan file extent");
876         fprintf(stderr, "\n");
877         /* Print the orphan extents if needed */
878         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
879                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
880
881         /* Print the holes if needed */
882         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
883                 struct file_extent_hole *hole;
884                 struct rb_node *node;
885                 int found = 0;
886
887                 node = rb_first(&rec->holes);
888                 fprintf(stderr, "Found file extent holes:\n");
889                 while (node) {
890                         found = 1;
891                         hole = rb_entry(node, struct file_extent_hole, node);
892                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
893                                 hole->start, hole->len);
894                         node = rb_next(node);
895                 }
896                 if (!found)
897                         fprintf(stderr, "\tstart: 0, len: %llu\n",
898                                 round_up(rec->isize, root->sectorsize));
899         }
900 }
901
902 static void print_ref_error(int errors)
903 {
904         if (errors & REF_ERR_NO_DIR_ITEM)
905                 fprintf(stderr, ", no dir item");
906         if (errors & REF_ERR_NO_DIR_INDEX)
907                 fprintf(stderr, ", no dir index");
908         if (errors & REF_ERR_NO_INODE_REF)
909                 fprintf(stderr, ", no inode ref");
910         if (errors & REF_ERR_DUP_DIR_ITEM)
911                 fprintf(stderr, ", dup dir item");
912         if (errors & REF_ERR_DUP_DIR_INDEX)
913                 fprintf(stderr, ", dup dir index");
914         if (errors & REF_ERR_DUP_INODE_REF)
915                 fprintf(stderr, ", dup inode ref");
916         if (errors & REF_ERR_INDEX_UNMATCH)
917                 fprintf(stderr, ", index mismatch");
918         if (errors & REF_ERR_FILETYPE_UNMATCH)
919                 fprintf(stderr, ", filetype mismatch");
920         if (errors & REF_ERR_NAME_TOO_LONG)
921                 fprintf(stderr, ", name too long");
922         if (errors & REF_ERR_NO_ROOT_REF)
923                 fprintf(stderr, ", no root ref");
924         if (errors & REF_ERR_NO_ROOT_BACKREF)
925                 fprintf(stderr, ", no root backref");
926         if (errors & REF_ERR_DUP_ROOT_REF)
927                 fprintf(stderr, ", dup root ref");
928         if (errors & REF_ERR_DUP_ROOT_BACKREF)
929                 fprintf(stderr, ", dup root backref");
930         fprintf(stderr, "\n");
931 }
932
933 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
934                                           u64 ino, int mod)
935 {
936         struct ptr_node *node;
937         struct cache_extent *cache;
938         struct inode_record *rec = NULL;
939         int ret;
940
941         cache = lookup_cache_extent(inode_cache, ino, 1);
942         if (cache) {
943                 node = container_of(cache, struct ptr_node, cache);
944                 rec = node->data;
945                 if (mod && rec->refs > 1) {
946                         node->data = clone_inode_rec(rec);
947                         if (IS_ERR(node->data))
948                                 return node->data;
949                         rec->refs--;
950                         rec = node->data;
951                 }
952         } else if (mod) {
953                 rec = calloc(1, sizeof(*rec));
954                 if (!rec)
955                         return ERR_PTR(-ENOMEM);
956                 rec->ino = ino;
957                 rec->extent_start = (u64)-1;
958                 rec->refs = 1;
959                 INIT_LIST_HEAD(&rec->backrefs);
960                 INIT_LIST_HEAD(&rec->orphan_extents);
961                 rec->holes = RB_ROOT;
962
963                 node = malloc(sizeof(*node));
964                 if (!node) {
965                         free(rec);
966                         return ERR_PTR(-ENOMEM);
967                 }
968                 node->cache.start = ino;
969                 node->cache.size = 1;
970                 node->data = rec;
971
972                 if (ino == BTRFS_FREE_INO_OBJECTID)
973                         rec->found_link = 1;
974
975                 ret = insert_cache_extent(inode_cache, &node->cache);
976                 if (ret)
977                         return ERR_PTR(-EEXIST);
978         }
979         return rec;
980 }
981
982 static void free_orphan_data_extents(struct list_head *orphan_extents)
983 {
984         struct orphan_data_extent *orphan;
985
986         while (!list_empty(orphan_extents)) {
987                 orphan = list_entry(orphan_extents->next,
988                                     struct orphan_data_extent, list);
989                 list_del(&orphan->list);
990                 free(orphan);
991         }
992 }
993
994 static void free_inode_rec(struct inode_record *rec)
995 {
996         struct inode_backref *backref;
997
998         if (--rec->refs > 0)
999                 return;
1000
1001         while (!list_empty(&rec->backrefs)) {
1002                 backref = to_inode_backref(rec->backrefs.next);
1003                 list_del(&backref->list);
1004                 free(backref);
1005         }
1006         free_orphan_data_extents(&rec->orphan_extents);
1007         free_file_extent_holes(&rec->holes);
1008         free(rec);
1009 }
1010
1011 static int can_free_inode_rec(struct inode_record *rec)
1012 {
1013         if (!rec->errors && rec->checked && rec->found_inode_item &&
1014             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1015                 return 1;
1016         return 0;
1017 }
1018
1019 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1020                                  struct inode_record *rec)
1021 {
1022         struct cache_extent *cache;
1023         struct inode_backref *tmp, *backref;
1024         struct ptr_node *node;
1025         unsigned char filetype;
1026
1027         if (!rec->found_inode_item)
1028                 return;
1029
1030         filetype = imode_to_type(rec->imode);
1031         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1032                 if (backref->found_dir_item && backref->found_dir_index) {
1033                         if (backref->filetype != filetype)
1034                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1035                         if (!backref->errors && backref->found_inode_ref &&
1036                             rec->nlink == rec->found_link) {
1037                                 list_del(&backref->list);
1038                                 free(backref);
1039                         }
1040                 }
1041         }
1042
1043         if (!rec->checked || rec->merging)
1044                 return;
1045
1046         if (S_ISDIR(rec->imode)) {
1047                 if (rec->found_size != rec->isize)
1048                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1049                 if (rec->found_file_extent)
1050                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1051         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1052                 if (rec->found_dir_item)
1053                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1054                 if (rec->found_size != rec->nbytes)
1055                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1056                 if (rec->nlink > 0 && !no_holes &&
1057                     (rec->extent_end < rec->isize ||
1058                      first_extent_gap(&rec->holes) < rec->isize))
1059                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1060         }
1061
1062         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1063                 if (rec->found_csum_item && rec->nodatasum)
1064                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1065                 if (rec->some_csum_missing && !rec->nodatasum)
1066                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1067         }
1068
1069         BUG_ON(rec->refs != 1);
1070         if (can_free_inode_rec(rec)) {
1071                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1072                 node = container_of(cache, struct ptr_node, cache);
1073                 BUG_ON(node->data != rec);
1074                 remove_cache_extent(inode_cache, &node->cache);
1075                 free(node);
1076                 free_inode_rec(rec);
1077         }
1078 }
1079
1080 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1081 {
1082         struct btrfs_path path;
1083         struct btrfs_key key;
1084         int ret;
1085
1086         key.objectid = BTRFS_ORPHAN_OBJECTID;
1087         key.type = BTRFS_ORPHAN_ITEM_KEY;
1088         key.offset = ino;
1089
1090         btrfs_init_path(&path);
1091         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1092         btrfs_release_path(&path);
1093         if (ret > 0)
1094                 ret = -ENOENT;
1095         return ret;
1096 }
1097
1098 static int process_inode_item(struct extent_buffer *eb,
1099                               int slot, struct btrfs_key *key,
1100                               struct shared_node *active_node)
1101 {
1102         struct inode_record *rec;
1103         struct btrfs_inode_item *item;
1104
1105         rec = active_node->current;
1106         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1107         if (rec->found_inode_item) {
1108                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1109                 return 1;
1110         }
1111         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1112         rec->nlink = btrfs_inode_nlink(eb, item);
1113         rec->isize = btrfs_inode_size(eb, item);
1114         rec->nbytes = btrfs_inode_nbytes(eb, item);
1115         rec->imode = btrfs_inode_mode(eb, item);
1116         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1117                 rec->nodatasum = 1;
1118         rec->found_inode_item = 1;
1119         if (rec->nlink == 0)
1120                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1121         maybe_free_inode_rec(&active_node->inode_cache, rec);
1122         return 0;
1123 }
1124
1125 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1126                                                 const char *name,
1127                                                 int namelen, u64 dir)
1128 {
1129         struct inode_backref *backref;
1130
1131         list_for_each_entry(backref, &rec->backrefs, list) {
1132                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1133                         break;
1134                 if (backref->dir != dir || backref->namelen != namelen)
1135                         continue;
1136                 if (memcmp(name, backref->name, namelen))
1137                         continue;
1138                 return backref;
1139         }
1140
1141         backref = malloc(sizeof(*backref) + namelen + 1);
1142         if (!backref)
1143                 return NULL;
1144         memset(backref, 0, sizeof(*backref));
1145         backref->dir = dir;
1146         backref->namelen = namelen;
1147         memcpy(backref->name, name, namelen);
1148         backref->name[namelen] = '\0';
1149         list_add_tail(&backref->list, &rec->backrefs);
1150         return backref;
1151 }
1152
1153 static int add_inode_backref(struct cache_tree *inode_cache,
1154                              u64 ino, u64 dir, u64 index,
1155                              const char *name, int namelen,
1156                              int filetype, int itemtype, int errors)
1157 {
1158         struct inode_record *rec;
1159         struct inode_backref *backref;
1160
1161         rec = get_inode_rec(inode_cache, ino, 1);
1162         BUG_ON(IS_ERR(rec));
1163         backref = get_inode_backref(rec, name, namelen, dir);
1164         BUG_ON(!backref);
1165         if (errors)
1166                 backref->errors |= errors;
1167         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1168                 if (backref->found_dir_index)
1169                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1170                 if (backref->found_inode_ref && backref->index != index)
1171                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1172                 if (backref->found_dir_item && backref->filetype != filetype)
1173                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1174
1175                 backref->index = index;
1176                 backref->filetype = filetype;
1177                 backref->found_dir_index = 1;
1178         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1179                 rec->found_link++;
1180                 if (backref->found_dir_item)
1181                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1182                 if (backref->found_dir_index && backref->filetype != filetype)
1183                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1184
1185                 backref->filetype = filetype;
1186                 backref->found_dir_item = 1;
1187         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1188                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1189                 if (backref->found_inode_ref)
1190                         backref->errors |= REF_ERR_DUP_INODE_REF;
1191                 if (backref->found_dir_index && backref->index != index)
1192                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1193                 else
1194                         backref->index = index;
1195
1196                 backref->ref_type = itemtype;
1197                 backref->found_inode_ref = 1;
1198         } else {
1199                 BUG_ON(1);
1200         }
1201
1202         maybe_free_inode_rec(inode_cache, rec);
1203         return 0;
1204 }
1205
1206 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1207                             struct cache_tree *dst_cache)
1208 {
1209         struct inode_backref *backref;
1210         u32 dir_count = 0;
1211         int ret = 0;
1212
1213         dst->merging = 1;
1214         list_for_each_entry(backref, &src->backrefs, list) {
1215                 if (backref->found_dir_index) {
1216                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1217                                         backref->index, backref->name,
1218                                         backref->namelen, backref->filetype,
1219                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1220                 }
1221                 if (backref->found_dir_item) {
1222                         dir_count++;
1223                         add_inode_backref(dst_cache, dst->ino,
1224                                         backref->dir, 0, backref->name,
1225                                         backref->namelen, backref->filetype,
1226                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1227                 }
1228                 if (backref->found_inode_ref) {
1229                         add_inode_backref(dst_cache, dst->ino,
1230                                         backref->dir, backref->index,
1231                                         backref->name, backref->namelen, 0,
1232                                         backref->ref_type, backref->errors);
1233                 }
1234         }
1235
1236         if (src->found_dir_item)
1237                 dst->found_dir_item = 1;
1238         if (src->found_file_extent)
1239                 dst->found_file_extent = 1;
1240         if (src->found_csum_item)
1241                 dst->found_csum_item = 1;
1242         if (src->some_csum_missing)
1243                 dst->some_csum_missing = 1;
1244         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1245                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1246                 if (ret < 0)
1247                         return ret;
1248         }
1249
1250         BUG_ON(src->found_link < dir_count);
1251         dst->found_link += src->found_link - dir_count;
1252         dst->found_size += src->found_size;
1253         if (src->extent_start != (u64)-1) {
1254                 if (dst->extent_start == (u64)-1) {
1255                         dst->extent_start = src->extent_start;
1256                         dst->extent_end = src->extent_end;
1257                 } else {
1258                         if (dst->extent_end > src->extent_start)
1259                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1260                         else if (dst->extent_end < src->extent_start) {
1261                                 ret = add_file_extent_hole(&dst->holes,
1262                                         dst->extent_end,
1263                                         src->extent_start - dst->extent_end);
1264                         }
1265                         if (dst->extent_end < src->extent_end)
1266                                 dst->extent_end = src->extent_end;
1267                 }
1268         }
1269
1270         dst->errors |= src->errors;
1271         if (src->found_inode_item) {
1272                 if (!dst->found_inode_item) {
1273                         dst->nlink = src->nlink;
1274                         dst->isize = src->isize;
1275                         dst->nbytes = src->nbytes;
1276                         dst->imode = src->imode;
1277                         dst->nodatasum = src->nodatasum;
1278                         dst->found_inode_item = 1;
1279                 } else {
1280                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1281                 }
1282         }
1283         dst->merging = 0;
1284
1285         return 0;
1286 }
1287
1288 static int splice_shared_node(struct shared_node *src_node,
1289                               struct shared_node *dst_node)
1290 {
1291         struct cache_extent *cache;
1292         struct ptr_node *node, *ins;
1293         struct cache_tree *src, *dst;
1294         struct inode_record *rec, *conflict;
1295         u64 current_ino = 0;
1296         int splice = 0;
1297         int ret;
1298
1299         if (--src_node->refs == 0)
1300                 splice = 1;
1301         if (src_node->current)
1302                 current_ino = src_node->current->ino;
1303
1304         src = &src_node->root_cache;
1305         dst = &dst_node->root_cache;
1306 again:
1307         cache = search_cache_extent(src, 0);
1308         while (cache) {
1309                 node = container_of(cache, struct ptr_node, cache);
1310                 rec = node->data;
1311                 cache = next_cache_extent(cache);
1312
1313                 if (splice) {
1314                         remove_cache_extent(src, &node->cache);
1315                         ins = node;
1316                 } else {
1317                         ins = malloc(sizeof(*ins));
1318                         BUG_ON(!ins);
1319                         ins->cache.start = node->cache.start;
1320                         ins->cache.size = node->cache.size;
1321                         ins->data = rec;
1322                         rec->refs++;
1323                 }
1324                 ret = insert_cache_extent(dst, &ins->cache);
1325                 if (ret == -EEXIST) {
1326                         conflict = get_inode_rec(dst, rec->ino, 1);
1327                         BUG_ON(IS_ERR(conflict));
1328                         merge_inode_recs(rec, conflict, dst);
1329                         if (rec->checked) {
1330                                 conflict->checked = 1;
1331                                 if (dst_node->current == conflict)
1332                                         dst_node->current = NULL;
1333                         }
1334                         maybe_free_inode_rec(dst, conflict);
1335                         free_inode_rec(rec);
1336                         free(ins);
1337                 } else {
1338                         BUG_ON(ret);
1339                 }
1340         }
1341
1342         if (src == &src_node->root_cache) {
1343                 src = &src_node->inode_cache;
1344                 dst = &dst_node->inode_cache;
1345                 goto again;
1346         }
1347
1348         if (current_ino > 0 && (!dst_node->current ||
1349             current_ino > dst_node->current->ino)) {
1350                 if (dst_node->current) {
1351                         dst_node->current->checked = 1;
1352                         maybe_free_inode_rec(dst, dst_node->current);
1353                 }
1354                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1355                 BUG_ON(IS_ERR(dst_node->current));
1356         }
1357         return 0;
1358 }
1359
1360 static void free_inode_ptr(struct cache_extent *cache)
1361 {
1362         struct ptr_node *node;
1363         struct inode_record *rec;
1364
1365         node = container_of(cache, struct ptr_node, cache);
1366         rec = node->data;
1367         free_inode_rec(rec);
1368         free(node);
1369 }
1370
1371 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1372
1373 static struct shared_node *find_shared_node(struct cache_tree *shared,
1374                                             u64 bytenr)
1375 {
1376         struct cache_extent *cache;
1377         struct shared_node *node;
1378
1379         cache = lookup_cache_extent(shared, bytenr, 1);
1380         if (cache) {
1381                 node = container_of(cache, struct shared_node, cache);
1382                 return node;
1383         }
1384         return NULL;
1385 }
1386
1387 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1388 {
1389         int ret;
1390         struct shared_node *node;
1391
1392         node = calloc(1, sizeof(*node));
1393         if (!node)
1394                 return -ENOMEM;
1395         node->cache.start = bytenr;
1396         node->cache.size = 1;
1397         cache_tree_init(&node->root_cache);
1398         cache_tree_init(&node->inode_cache);
1399         node->refs = refs;
1400
1401         ret = insert_cache_extent(shared, &node->cache);
1402
1403         return ret;
1404 }
1405
1406 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1407                              struct walk_control *wc, int level)
1408 {
1409         struct shared_node *node;
1410         struct shared_node *dest;
1411         int ret;
1412
1413         if (level == wc->active_node)
1414                 return 0;
1415
1416         BUG_ON(wc->active_node <= level);
1417         node = find_shared_node(&wc->shared, bytenr);
1418         if (!node) {
1419                 ret = add_shared_node(&wc->shared, bytenr, refs);
1420                 BUG_ON(ret);
1421                 node = find_shared_node(&wc->shared, bytenr);
1422                 wc->nodes[level] = node;
1423                 wc->active_node = level;
1424                 return 0;
1425         }
1426
1427         if (wc->root_level == wc->active_node &&
1428             btrfs_root_refs(&root->root_item) == 0) {
1429                 if (--node->refs == 0) {
1430                         free_inode_recs_tree(&node->root_cache);
1431                         free_inode_recs_tree(&node->inode_cache);
1432                         remove_cache_extent(&wc->shared, &node->cache);
1433                         free(node);
1434                 }
1435                 return 1;
1436         }
1437
1438         dest = wc->nodes[wc->active_node];
1439         splice_shared_node(node, dest);
1440         if (node->refs == 0) {
1441                 remove_cache_extent(&wc->shared, &node->cache);
1442                 free(node);
1443         }
1444         return 1;
1445 }
1446
1447 static int leave_shared_node(struct btrfs_root *root,
1448                              struct walk_control *wc, int level)
1449 {
1450         struct shared_node *node;
1451         struct shared_node *dest;
1452         int i;
1453
1454         if (level == wc->root_level)
1455                 return 0;
1456
1457         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1458                 if (wc->nodes[i])
1459                         break;
1460         }
1461         BUG_ON(i >= BTRFS_MAX_LEVEL);
1462
1463         node = wc->nodes[wc->active_node];
1464         wc->nodes[wc->active_node] = NULL;
1465         wc->active_node = i;
1466
1467         dest = wc->nodes[wc->active_node];
1468         if (wc->active_node < wc->root_level ||
1469             btrfs_root_refs(&root->root_item) > 0) {
1470                 BUG_ON(node->refs <= 1);
1471                 splice_shared_node(node, dest);
1472         } else {
1473                 BUG_ON(node->refs < 2);
1474                 node->refs--;
1475         }
1476         return 0;
1477 }
1478
1479 /*
1480  * Returns:
1481  * < 0 - on error
1482  * 1   - if the root with id child_root_id is a child of root parent_root_id
1483  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1484  *       has other root(s) as parent(s)
1485  * 2   - if the root child_root_id doesn't have any parent roots
1486  */
1487 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1488                          u64 child_root_id)
1489 {
1490         struct btrfs_path path;
1491         struct btrfs_key key;
1492         struct extent_buffer *leaf;
1493         int has_parent = 0;
1494         int ret;
1495
1496         btrfs_init_path(&path);
1497
1498         key.objectid = parent_root_id;
1499         key.type = BTRFS_ROOT_REF_KEY;
1500         key.offset = child_root_id;
1501         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1502                                 0, 0);
1503         if (ret < 0)
1504                 return ret;
1505         btrfs_release_path(&path);
1506         if (!ret)
1507                 return 1;
1508
1509         key.objectid = child_root_id;
1510         key.type = BTRFS_ROOT_BACKREF_KEY;
1511         key.offset = 0;
1512         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1513                                 0, 0);
1514         if (ret < 0)
1515                 goto out;
1516
1517         while (1) {
1518                 leaf = path.nodes[0];
1519                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1520                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1521                         if (ret)
1522                                 break;
1523                         leaf = path.nodes[0];
1524                 }
1525
1526                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1527                 if (key.objectid != child_root_id ||
1528                     key.type != BTRFS_ROOT_BACKREF_KEY)
1529                         break;
1530
1531                 has_parent = 1;
1532
1533                 if (key.offset == parent_root_id) {
1534                         btrfs_release_path(&path);
1535                         return 1;
1536                 }
1537
1538                 path.slots[0]++;
1539         }
1540 out:
1541         btrfs_release_path(&path);
1542         if (ret < 0)
1543                 return ret;
1544         return has_parent ? 0 : 2;
1545 }
1546
1547 static int process_dir_item(struct btrfs_root *root,
1548                             struct extent_buffer *eb,
1549                             int slot, struct btrfs_key *key,
1550                             struct shared_node *active_node)
1551 {
1552         u32 total;
1553         u32 cur = 0;
1554         u32 len;
1555         u32 name_len;
1556         u32 data_len;
1557         int error;
1558         int nritems = 0;
1559         int filetype;
1560         struct btrfs_dir_item *di;
1561         struct inode_record *rec;
1562         struct cache_tree *root_cache;
1563         struct cache_tree *inode_cache;
1564         struct btrfs_key location;
1565         char namebuf[BTRFS_NAME_LEN];
1566
1567         root_cache = &active_node->root_cache;
1568         inode_cache = &active_node->inode_cache;
1569         rec = active_node->current;
1570         rec->found_dir_item = 1;
1571
1572         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1573         total = btrfs_item_size_nr(eb, slot);
1574         while (cur < total) {
1575                 nritems++;
1576                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1577                 name_len = btrfs_dir_name_len(eb, di);
1578                 data_len = btrfs_dir_data_len(eb, di);
1579                 filetype = btrfs_dir_type(eb, di);
1580
1581                 rec->found_size += name_len;
1582                 if (name_len <= BTRFS_NAME_LEN) {
1583                         len = name_len;
1584                         error = 0;
1585                 } else {
1586                         len = BTRFS_NAME_LEN;
1587                         error = REF_ERR_NAME_TOO_LONG;
1588                 }
1589                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1590
1591                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1592                         add_inode_backref(inode_cache, location.objectid,
1593                                           key->objectid, key->offset, namebuf,
1594                                           len, filetype, key->type, error);
1595                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1596                         add_inode_backref(root_cache, location.objectid,
1597                                           key->objectid, key->offset,
1598                                           namebuf, len, filetype,
1599                                           key->type, error);
1600                 } else {
1601                         fprintf(stderr, "invalid location in dir item %u\n",
1602                                 location.type);
1603                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1604                                           key->objectid, key->offset, namebuf,
1605                                           len, filetype, key->type, error);
1606                 }
1607
1608                 len = sizeof(*di) + name_len + data_len;
1609                 di = (struct btrfs_dir_item *)((char *)di + len);
1610                 cur += len;
1611         }
1612         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1613                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1614
1615         return 0;
1616 }
1617
1618 static int process_inode_ref(struct extent_buffer *eb,
1619                              int slot, struct btrfs_key *key,
1620                              struct shared_node *active_node)
1621 {
1622         u32 total;
1623         u32 cur = 0;
1624         u32 len;
1625         u32 name_len;
1626         u64 index;
1627         int error;
1628         struct cache_tree *inode_cache;
1629         struct btrfs_inode_ref *ref;
1630         char namebuf[BTRFS_NAME_LEN];
1631
1632         inode_cache = &active_node->inode_cache;
1633
1634         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1635         total = btrfs_item_size_nr(eb, slot);
1636         while (cur < total) {
1637                 name_len = btrfs_inode_ref_name_len(eb, ref);
1638                 index = btrfs_inode_ref_index(eb, ref);
1639                 if (name_len <= BTRFS_NAME_LEN) {
1640                         len = name_len;
1641                         error = 0;
1642                 } else {
1643                         len = BTRFS_NAME_LEN;
1644                         error = REF_ERR_NAME_TOO_LONG;
1645                 }
1646                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1647                 add_inode_backref(inode_cache, key->objectid, key->offset,
1648                                   index, namebuf, len, 0, key->type, error);
1649
1650                 len = sizeof(*ref) + name_len;
1651                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1652                 cur += len;
1653         }
1654         return 0;
1655 }
1656
1657 static int process_inode_extref(struct extent_buffer *eb,
1658                                 int slot, struct btrfs_key *key,
1659                                 struct shared_node *active_node)
1660 {
1661         u32 total;
1662         u32 cur = 0;
1663         u32 len;
1664         u32 name_len;
1665         u64 index;
1666         u64 parent;
1667         int error;
1668         struct cache_tree *inode_cache;
1669         struct btrfs_inode_extref *extref;
1670         char namebuf[BTRFS_NAME_LEN];
1671
1672         inode_cache = &active_node->inode_cache;
1673
1674         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1675         total = btrfs_item_size_nr(eb, slot);
1676         while (cur < total) {
1677                 name_len = btrfs_inode_extref_name_len(eb, extref);
1678                 index = btrfs_inode_extref_index(eb, extref);
1679                 parent = btrfs_inode_extref_parent(eb, extref);
1680                 if (name_len <= BTRFS_NAME_LEN) {
1681                         len = name_len;
1682                         error = 0;
1683                 } else {
1684                         len = BTRFS_NAME_LEN;
1685                         error = REF_ERR_NAME_TOO_LONG;
1686                 }
1687                 read_extent_buffer(eb, namebuf,
1688                                    (unsigned long)(extref + 1), len);
1689                 add_inode_backref(inode_cache, key->objectid, parent,
1690                                   index, namebuf, len, 0, key->type, error);
1691
1692                 len = sizeof(*extref) + name_len;
1693                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1694                 cur += len;
1695         }
1696         return 0;
1697
1698 }
1699
1700 static int count_csum_range(struct btrfs_root *root, u64 start,
1701                             u64 len, u64 *found)
1702 {
1703         struct btrfs_key key;
1704         struct btrfs_path path;
1705         struct extent_buffer *leaf;
1706         int ret;
1707         size_t size;
1708         *found = 0;
1709         u64 csum_end;
1710         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1711
1712         btrfs_init_path(&path);
1713
1714         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1715         key.offset = start;
1716         key.type = BTRFS_EXTENT_CSUM_KEY;
1717
1718         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1719                                 &key, &path, 0, 0);
1720         if (ret < 0)
1721                 goto out;
1722         if (ret > 0 && path.slots[0] > 0) {
1723                 leaf = path.nodes[0];
1724                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1725                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1726                     key.type == BTRFS_EXTENT_CSUM_KEY)
1727                         path.slots[0]--;
1728         }
1729
1730         while (len > 0) {
1731                 leaf = path.nodes[0];
1732                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1733                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1734                         if (ret > 0)
1735                                 break;
1736                         else if (ret < 0)
1737                                 goto out;
1738                         leaf = path.nodes[0];
1739                 }
1740
1741                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1742                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1743                     key.type != BTRFS_EXTENT_CSUM_KEY)
1744                         break;
1745
1746                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1747                 if (key.offset >= start + len)
1748                         break;
1749
1750                 if (key.offset > start)
1751                         start = key.offset;
1752
1753                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1754                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1755                 if (csum_end > start) {
1756                         size = min(csum_end - start, len);
1757                         len -= size;
1758                         start += size;
1759                         *found += size;
1760                 }
1761
1762                 path.slots[0]++;
1763         }
1764 out:
1765         btrfs_release_path(&path);
1766         if (ret < 0)
1767                 return ret;
1768         return 0;
1769 }
1770
1771 static int process_file_extent(struct btrfs_root *root,
1772                                 struct extent_buffer *eb,
1773                                 int slot, struct btrfs_key *key,
1774                                 struct shared_node *active_node)
1775 {
1776         struct inode_record *rec;
1777         struct btrfs_file_extent_item *fi;
1778         u64 num_bytes = 0;
1779         u64 disk_bytenr = 0;
1780         u64 extent_offset = 0;
1781         u64 mask = root->sectorsize - 1;
1782         int extent_type;
1783         int ret;
1784
1785         rec = active_node->current;
1786         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1787         rec->found_file_extent = 1;
1788
1789         if (rec->extent_start == (u64)-1) {
1790                 rec->extent_start = key->offset;
1791                 rec->extent_end = key->offset;
1792         }
1793
1794         if (rec->extent_end > key->offset)
1795                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1796         else if (rec->extent_end < key->offset) {
1797                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1798                                            key->offset - rec->extent_end);
1799                 if (ret < 0)
1800                         return ret;
1801         }
1802
1803         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1804         extent_type = btrfs_file_extent_type(eb, fi);
1805
1806         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1807                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1808                 if (num_bytes == 0)
1809                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1810                 rec->found_size += num_bytes;
1811                 num_bytes = (num_bytes + mask) & ~mask;
1812         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1813                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1814                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1815                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1816                 extent_offset = btrfs_file_extent_offset(eb, fi);
1817                 if (num_bytes == 0 || (num_bytes & mask))
1818                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1819                 if (num_bytes + extent_offset >
1820                     btrfs_file_extent_ram_bytes(eb, fi))
1821                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1822                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1823                     (btrfs_file_extent_compression(eb, fi) ||
1824                      btrfs_file_extent_encryption(eb, fi) ||
1825                      btrfs_file_extent_other_encoding(eb, fi)))
1826                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1827                 if (disk_bytenr > 0)
1828                         rec->found_size += num_bytes;
1829         } else {
1830                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1831         }
1832         rec->extent_end = key->offset + num_bytes;
1833
1834         /*
1835          * The data reloc tree will copy full extents into its inode and then
1836          * copy the corresponding csums.  Because the extent it copied could be
1837          * a preallocated extent that hasn't been written to yet there may be no
1838          * csums to copy, ergo we won't have csums for our file extent.  This is
1839          * ok so just don't bother checking csums if the inode belongs to the
1840          * data reloc tree.
1841          */
1842         if (disk_bytenr > 0 &&
1843             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1844                 u64 found;
1845                 if (btrfs_file_extent_compression(eb, fi))
1846                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1847                 else
1848                         disk_bytenr += extent_offset;
1849
1850                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1851                 if (ret < 0)
1852                         return ret;
1853                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1854                         if (found > 0)
1855                                 rec->found_csum_item = 1;
1856                         if (found < num_bytes)
1857                                 rec->some_csum_missing = 1;
1858                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1859                         if (found > 0)
1860                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1861                 }
1862         }
1863         return 0;
1864 }
1865
1866 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1867                             struct walk_control *wc)
1868 {
1869         struct btrfs_key key;
1870         u32 nritems;
1871         int i;
1872         int ret = 0;
1873         struct cache_tree *inode_cache;
1874         struct shared_node *active_node;
1875
1876         if (wc->root_level == wc->active_node &&
1877             btrfs_root_refs(&root->root_item) == 0)
1878                 return 0;
1879
1880         active_node = wc->nodes[wc->active_node];
1881         inode_cache = &active_node->inode_cache;
1882         nritems = btrfs_header_nritems(eb);
1883         for (i = 0; i < nritems; i++) {
1884                 btrfs_item_key_to_cpu(eb, &key, i);
1885
1886                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1887                         continue;
1888                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1889                         continue;
1890
1891                 if (active_node->current == NULL ||
1892                     active_node->current->ino < key.objectid) {
1893                         if (active_node->current) {
1894                                 active_node->current->checked = 1;
1895                                 maybe_free_inode_rec(inode_cache,
1896                                                      active_node->current);
1897                         }
1898                         active_node->current = get_inode_rec(inode_cache,
1899                                                              key.objectid, 1);
1900                         BUG_ON(IS_ERR(active_node->current));
1901                 }
1902                 switch (key.type) {
1903                 case BTRFS_DIR_ITEM_KEY:
1904                 case BTRFS_DIR_INDEX_KEY:
1905                         ret = process_dir_item(root, eb, i, &key, active_node);
1906                         break;
1907                 case BTRFS_INODE_REF_KEY:
1908                         ret = process_inode_ref(eb, i, &key, active_node);
1909                         break;
1910                 case BTRFS_INODE_EXTREF_KEY:
1911                         ret = process_inode_extref(eb, i, &key, active_node);
1912                         break;
1913                 case BTRFS_INODE_ITEM_KEY:
1914                         ret = process_inode_item(eb, i, &key, active_node);
1915                         break;
1916                 case BTRFS_EXTENT_DATA_KEY:
1917                         ret = process_file_extent(root, eb, i, &key,
1918                                                   active_node);
1919                         break;
1920                 default:
1921                         break;
1922                 };
1923         }
1924         return ret;
1925 }
1926
1927 static void reada_walk_down(struct btrfs_root *root,
1928                             struct extent_buffer *node, int slot)
1929 {
1930         u64 bytenr;
1931         u64 ptr_gen;
1932         u32 nritems;
1933         u32 blocksize;
1934         int i;
1935         int level;
1936
1937         level = btrfs_header_level(node);
1938         if (level != 1)
1939                 return;
1940
1941         nritems = btrfs_header_nritems(node);
1942         blocksize = root->nodesize;
1943         for (i = slot; i < nritems; i++) {
1944                 bytenr = btrfs_node_blockptr(node, i);
1945                 ptr_gen = btrfs_node_ptr_generation(node, i);
1946                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1947         }
1948 }
1949
1950 /*
1951  * Check the child node/leaf by the following condition:
1952  * 1. the first item key of the node/leaf should be the same with the one
1953  *    in parent.
1954  * 2. block in parent node should match the child node/leaf.
1955  * 3. generation of parent node and child's header should be consistent.
1956  *
1957  * Or the child node/leaf pointed by the key in parent is not valid.
1958  *
1959  * We hope to check leaf owner too, but since subvol may share leaves,
1960  * which makes leaf owner check not so strong, key check should be
1961  * sufficient enough for that case.
1962  */
1963 static int check_child_node(struct btrfs_root *root,
1964                             struct extent_buffer *parent, int slot,
1965                             struct extent_buffer *child)
1966 {
1967         struct btrfs_key parent_key;
1968         struct btrfs_key child_key;
1969         int ret = 0;
1970
1971         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1972         if (btrfs_header_level(child) == 0)
1973                 btrfs_item_key_to_cpu(child, &child_key, 0);
1974         else
1975                 btrfs_node_key_to_cpu(child, &child_key, 0);
1976
1977         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1978                 ret = -EINVAL;
1979                 fprintf(stderr,
1980                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1981                         parent_key.objectid, parent_key.type, parent_key.offset,
1982                         child_key.objectid, child_key.type, child_key.offset);
1983         }
1984         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1985                 ret = -EINVAL;
1986                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1987                         btrfs_node_blockptr(parent, slot),
1988                         btrfs_header_bytenr(child));
1989         }
1990         if (btrfs_node_ptr_generation(parent, slot) !=
1991             btrfs_header_generation(child)) {
1992                 ret = -EINVAL;
1993                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1994                         btrfs_header_generation(child),
1995                         btrfs_node_ptr_generation(parent, slot));
1996         }
1997         return ret;
1998 }
1999
2000 struct node_refs {
2001         u64 bytenr[BTRFS_MAX_LEVEL];
2002         u64 refs[BTRFS_MAX_LEVEL];
2003 };
2004
2005 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2006                           struct walk_control *wc, int *level,
2007                           struct node_refs *nrefs)
2008 {
2009         enum btrfs_tree_block_status status;
2010         u64 bytenr;
2011         u64 ptr_gen;
2012         struct extent_buffer *next;
2013         struct extent_buffer *cur;
2014         u32 blocksize;
2015         int ret, err = 0;
2016         u64 refs;
2017
2018         WARN_ON(*level < 0);
2019         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2020
2021         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2022                 refs = nrefs->refs[*level];
2023                 ret = 0;
2024         } else {
2025                 ret = btrfs_lookup_extent_info(NULL, root,
2026                                        path->nodes[*level]->start,
2027                                        *level, 1, &refs, NULL);
2028                 if (ret < 0) {
2029                         err = ret;
2030                         goto out;
2031                 }
2032                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2033                 nrefs->refs[*level] = refs;
2034         }
2035
2036         if (refs > 1) {
2037                 ret = enter_shared_node(root, path->nodes[*level]->start,
2038                                         refs, wc, *level);
2039                 if (ret > 0) {
2040                         err = ret;
2041                         goto out;
2042                 }
2043         }
2044
2045         while (*level >= 0) {
2046                 WARN_ON(*level < 0);
2047                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2048                 cur = path->nodes[*level];
2049
2050                 if (btrfs_header_level(cur) != *level)
2051                         WARN_ON(1);
2052
2053                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2054                         break;
2055                 if (*level == 0) {
2056                         ret = process_one_leaf(root, cur, wc);
2057                         if (ret < 0)
2058                                 err = ret;
2059                         break;
2060                 }
2061                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2062                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2063                 blocksize = root->nodesize;
2064
2065                 if (bytenr == nrefs->bytenr[*level - 1]) {
2066                         refs = nrefs->refs[*level - 1];
2067                 } else {
2068                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2069                                         *level - 1, 1, &refs, NULL);
2070                         if (ret < 0) {
2071                                 refs = 0;
2072                         } else {
2073                                 nrefs->bytenr[*level - 1] = bytenr;
2074                                 nrefs->refs[*level - 1] = refs;
2075                         }
2076                 }
2077
2078                 if (refs > 1) {
2079                         ret = enter_shared_node(root, bytenr, refs,
2080                                                 wc, *level - 1);
2081                         if (ret > 0) {
2082                                 path->slots[*level]++;
2083                                 continue;
2084                         }
2085                 }
2086
2087                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2088                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2089                         free_extent_buffer(next);
2090                         reada_walk_down(root, cur, path->slots[*level]);
2091                         next = read_tree_block(root, bytenr, blocksize,
2092                                                ptr_gen);
2093                         if (!extent_buffer_uptodate(next)) {
2094                                 struct btrfs_key node_key;
2095
2096                                 btrfs_node_key_to_cpu(path->nodes[*level],
2097                                                       &node_key,
2098                                                       path->slots[*level]);
2099                                 btrfs_add_corrupt_extent_record(root->fs_info,
2100                                                 &node_key,
2101                                                 path->nodes[*level]->start,
2102                                                 root->nodesize, *level);
2103                                 err = -EIO;
2104                                 goto out;
2105                         }
2106                 }
2107
2108                 ret = check_child_node(root, cur, path->slots[*level], next);
2109                 if (ret) {
2110                         err = ret;
2111                         goto out;
2112                 }
2113
2114                 if (btrfs_is_leaf(next))
2115                         status = btrfs_check_leaf(root, NULL, next);
2116                 else
2117                         status = btrfs_check_node(root, NULL, next);
2118                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2119                         free_extent_buffer(next);
2120                         err = -EIO;
2121                         goto out;
2122                 }
2123
2124                 *level = *level - 1;
2125                 free_extent_buffer(path->nodes[*level]);
2126                 path->nodes[*level] = next;
2127                 path->slots[*level] = 0;
2128         }
2129 out:
2130         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2131         return err;
2132 }
2133
2134 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2135                         struct walk_control *wc, int *level)
2136 {
2137         int i;
2138         struct extent_buffer *leaf;
2139
2140         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2141                 leaf = path->nodes[i];
2142                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2143                         path->slots[i]++;
2144                         *level = i;
2145                         return 0;
2146                 } else {
2147                         free_extent_buffer(path->nodes[*level]);
2148                         path->nodes[*level] = NULL;
2149                         BUG_ON(*level > wc->active_node);
2150                         if (*level == wc->active_node)
2151                                 leave_shared_node(root, wc, *level);
2152                         *level = i + 1;
2153                 }
2154         }
2155         return 1;
2156 }
2157
2158 static int check_root_dir(struct inode_record *rec)
2159 {
2160         struct inode_backref *backref;
2161         int ret = -1;
2162
2163         if (!rec->found_inode_item || rec->errors)
2164                 goto out;
2165         if (rec->nlink != 1 || rec->found_link != 0)
2166                 goto out;
2167         if (list_empty(&rec->backrefs))
2168                 goto out;
2169         backref = to_inode_backref(rec->backrefs.next);
2170         if (!backref->found_inode_ref)
2171                 goto out;
2172         if (backref->index != 0 || backref->namelen != 2 ||
2173             memcmp(backref->name, "..", 2))
2174                 goto out;
2175         if (backref->found_dir_index || backref->found_dir_item)
2176                 goto out;
2177         ret = 0;
2178 out:
2179         return ret;
2180 }
2181
2182 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2183                               struct btrfs_root *root, struct btrfs_path *path,
2184                               struct inode_record *rec)
2185 {
2186         struct btrfs_inode_item *ei;
2187         struct btrfs_key key;
2188         int ret;
2189
2190         key.objectid = rec->ino;
2191         key.type = BTRFS_INODE_ITEM_KEY;
2192         key.offset = (u64)-1;
2193
2194         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2195         if (ret < 0)
2196                 goto out;
2197         if (ret) {
2198                 if (!path->slots[0]) {
2199                         ret = -ENOENT;
2200                         goto out;
2201                 }
2202                 path->slots[0]--;
2203                 ret = 0;
2204         }
2205         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2206         if (key.objectid != rec->ino) {
2207                 ret = -ENOENT;
2208                 goto out;
2209         }
2210
2211         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2212                             struct btrfs_inode_item);
2213         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2214         btrfs_mark_buffer_dirty(path->nodes[0]);
2215         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2216         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2217                root->root_key.objectid);
2218 out:
2219         btrfs_release_path(path);
2220         return ret;
2221 }
2222
2223 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2224                                     struct btrfs_root *root,
2225                                     struct btrfs_path *path,
2226                                     struct inode_record *rec)
2227 {
2228         int ret;
2229
2230         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2231         btrfs_release_path(path);
2232         if (!ret)
2233                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2234         return ret;
2235 }
2236
2237 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2238                                struct btrfs_root *root,
2239                                struct btrfs_path *path,
2240                                struct inode_record *rec)
2241 {
2242         struct btrfs_inode_item *ei;
2243         struct btrfs_key key;
2244         int ret = 0;
2245
2246         key.objectid = rec->ino;
2247         key.type = BTRFS_INODE_ITEM_KEY;
2248         key.offset = 0;
2249
2250         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2251         if (ret) {
2252                 if (ret > 0)
2253                         ret = -ENOENT;
2254                 goto out;
2255         }
2256
2257         /* Since ret == 0, no need to check anything */
2258         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2259                             struct btrfs_inode_item);
2260         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2261         btrfs_mark_buffer_dirty(path->nodes[0]);
2262         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2263         printf("reset nbytes for ino %llu root %llu\n",
2264                rec->ino, root->root_key.objectid);
2265 out:
2266         btrfs_release_path(path);
2267         return ret;
2268 }
2269
2270 static int add_missing_dir_index(struct btrfs_root *root,
2271                                  struct cache_tree *inode_cache,
2272                                  struct inode_record *rec,
2273                                  struct inode_backref *backref)
2274 {
2275         struct btrfs_path *path;
2276         struct btrfs_trans_handle *trans;
2277         struct btrfs_dir_item *dir_item;
2278         struct extent_buffer *leaf;
2279         struct btrfs_key key;
2280         struct btrfs_disk_key disk_key;
2281         struct inode_record *dir_rec;
2282         unsigned long name_ptr;
2283         u32 data_size = sizeof(*dir_item) + backref->namelen;
2284         int ret;
2285
2286         path = btrfs_alloc_path();
2287         if (!path)
2288                 return -ENOMEM;
2289
2290         trans = btrfs_start_transaction(root, 1);
2291         if (IS_ERR(trans)) {
2292                 btrfs_free_path(path);
2293                 return PTR_ERR(trans);
2294         }
2295
2296         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2297                 (unsigned long long)rec->ino);
2298         key.objectid = backref->dir;
2299         key.type = BTRFS_DIR_INDEX_KEY;
2300         key.offset = backref->index;
2301
2302         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2303         BUG_ON(ret);
2304
2305         leaf = path->nodes[0];
2306         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2307
2308         disk_key.objectid = cpu_to_le64(rec->ino);
2309         disk_key.type = BTRFS_INODE_ITEM_KEY;
2310         disk_key.offset = 0;
2311
2312         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2313         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2314         btrfs_set_dir_data_len(leaf, dir_item, 0);
2315         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2316         name_ptr = (unsigned long)(dir_item + 1);
2317         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2318         btrfs_mark_buffer_dirty(leaf);
2319         btrfs_free_path(path);
2320         btrfs_commit_transaction(trans, root);
2321
2322         backref->found_dir_index = 1;
2323         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2324         BUG_ON(IS_ERR(dir_rec));
2325         if (!dir_rec)
2326                 return 0;
2327         dir_rec->found_size += backref->namelen;
2328         if (dir_rec->found_size == dir_rec->isize &&
2329             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2330                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2331         if (dir_rec->found_size != dir_rec->isize)
2332                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2333
2334         return 0;
2335 }
2336
2337 static int delete_dir_index(struct btrfs_root *root,
2338                             struct cache_tree *inode_cache,
2339                             struct inode_record *rec,
2340                             struct inode_backref *backref)
2341 {
2342         struct btrfs_trans_handle *trans;
2343         struct btrfs_dir_item *di;
2344         struct btrfs_path *path;
2345         int ret = 0;
2346
2347         path = btrfs_alloc_path();
2348         if (!path)
2349                 return -ENOMEM;
2350
2351         trans = btrfs_start_transaction(root, 1);
2352         if (IS_ERR(trans)) {
2353                 btrfs_free_path(path);
2354                 return PTR_ERR(trans);
2355         }
2356
2357
2358         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2359                 (unsigned long long)backref->dir,
2360                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2361                 (unsigned long long)root->objectid);
2362
2363         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2364                                     backref->name, backref->namelen,
2365                                     backref->index, -1);
2366         if (IS_ERR(di)) {
2367                 ret = PTR_ERR(di);
2368                 btrfs_free_path(path);
2369                 btrfs_commit_transaction(trans, root);
2370                 if (ret == -ENOENT)
2371                         return 0;
2372                 return ret;
2373         }
2374
2375         if (!di)
2376                 ret = btrfs_del_item(trans, root, path);
2377         else
2378                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2379         BUG_ON(ret);
2380         btrfs_free_path(path);
2381         btrfs_commit_transaction(trans, root);
2382         return ret;
2383 }
2384
2385 static int create_inode_item(struct btrfs_root *root,
2386                              struct inode_record *rec,
2387                              struct inode_backref *backref, int root_dir)
2388 {
2389         struct btrfs_trans_handle *trans;
2390         struct btrfs_inode_item inode_item;
2391         time_t now = time(NULL);
2392         int ret;
2393
2394         trans = btrfs_start_transaction(root, 1);
2395         if (IS_ERR(trans)) {
2396                 ret = PTR_ERR(trans);
2397                 return ret;
2398         }
2399
2400         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2401                 "be incomplete, please check permissions and content after "
2402                 "the fsck completes.\n", (unsigned long long)root->objectid,
2403                 (unsigned long long)rec->ino);
2404
2405         memset(&inode_item, 0, sizeof(inode_item));
2406         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2407         if (root_dir)
2408                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2409         else
2410                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2411         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2412         if (rec->found_dir_item) {
2413                 if (rec->found_file_extent)
2414                         fprintf(stderr, "root %llu inode %llu has both a dir "
2415                                 "item and extents, unsure if it is a dir or a "
2416                                 "regular file so setting it as a directory\n",
2417                                 (unsigned long long)root->objectid,
2418                                 (unsigned long long)rec->ino);
2419                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2420                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2421         } else if (!rec->found_dir_item) {
2422                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2423                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2424         }
2425         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2426         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2427         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2428         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2429         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2430         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2431         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2432         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2433
2434         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2435         BUG_ON(ret);
2436         btrfs_commit_transaction(trans, root);
2437         return 0;
2438 }
2439
2440 static int repair_inode_backrefs(struct btrfs_root *root,
2441                                  struct inode_record *rec,
2442                                  struct cache_tree *inode_cache,
2443                                  int delete)
2444 {
2445         struct inode_backref *tmp, *backref;
2446         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2447         int ret = 0;
2448         int repaired = 0;
2449
2450         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2451                 if (!delete && rec->ino == root_dirid) {
2452                         if (!rec->found_inode_item) {
2453                                 ret = create_inode_item(root, rec, backref, 1);
2454                                 if (ret)
2455                                         break;
2456                                 repaired++;
2457                         }
2458                 }
2459
2460                 /* Index 0 for root dir's are special, don't mess with it */
2461                 if (rec->ino == root_dirid && backref->index == 0)
2462                         continue;
2463
2464                 if (delete &&
2465                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2466                      (backref->found_dir_index && backref->found_inode_ref &&
2467                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2468                         ret = delete_dir_index(root, inode_cache, rec, backref);
2469                         if (ret)
2470                                 break;
2471                         repaired++;
2472                         list_del(&backref->list);
2473                         free(backref);
2474                 }
2475
2476                 if (!delete && !backref->found_dir_index &&
2477                     backref->found_dir_item && backref->found_inode_ref) {
2478                         ret = add_missing_dir_index(root, inode_cache, rec,
2479                                                     backref);
2480                         if (ret)
2481                                 break;
2482                         repaired++;
2483                         if (backref->found_dir_item &&
2484                             backref->found_dir_index &&
2485                             backref->found_dir_index) {
2486                                 if (!backref->errors &&
2487                                     backref->found_inode_ref) {
2488                                         list_del(&backref->list);
2489                                         free(backref);
2490                                 }
2491                         }
2492                 }
2493
2494                 if (!delete && (!backref->found_dir_index &&
2495                                 !backref->found_dir_item &&
2496                                 backref->found_inode_ref)) {
2497                         struct btrfs_trans_handle *trans;
2498                         struct btrfs_key location;
2499
2500                         ret = check_dir_conflict(root, backref->name,
2501                                                  backref->namelen,
2502                                                  backref->dir,
2503                                                  backref->index);
2504                         if (ret) {
2505                                 /*
2506                                  * let nlink fixing routine to handle it,
2507                                  * which can do it better.
2508                                  */
2509                                 ret = 0;
2510                                 break;
2511                         }
2512                         location.objectid = rec->ino;
2513                         location.type = BTRFS_INODE_ITEM_KEY;
2514                         location.offset = 0;
2515
2516                         trans = btrfs_start_transaction(root, 1);
2517                         if (IS_ERR(trans)) {
2518                                 ret = PTR_ERR(trans);
2519                                 break;
2520                         }
2521                         fprintf(stderr, "adding missing dir index/item pair "
2522                                 "for inode %llu\n",
2523                                 (unsigned long long)rec->ino);
2524                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2525                                                     backref->namelen,
2526                                                     backref->dir, &location,
2527                                                     imode_to_type(rec->imode),
2528                                                     backref->index);
2529                         BUG_ON(ret);
2530                         btrfs_commit_transaction(trans, root);
2531                         repaired++;
2532                 }
2533
2534                 if (!delete && (backref->found_inode_ref &&
2535                                 backref->found_dir_index &&
2536                                 backref->found_dir_item &&
2537                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2538                                 !rec->found_inode_item)) {
2539                         ret = create_inode_item(root, rec, backref, 0);
2540                         if (ret)
2541                                 break;
2542                         repaired++;
2543                 }
2544
2545         }
2546         return ret ? ret : repaired;
2547 }
2548
2549 /*
2550  * To determine the file type for nlink/inode_item repair
2551  *
2552  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2553  * Return -ENOENT if file type is not found.
2554  */
2555 static int find_file_type(struct inode_record *rec, u8 *type)
2556 {
2557         struct inode_backref *backref;
2558
2559         /* For inode item recovered case */
2560         if (rec->found_inode_item) {
2561                 *type = imode_to_type(rec->imode);
2562                 return 0;
2563         }
2564
2565         list_for_each_entry(backref, &rec->backrefs, list) {
2566                 if (backref->found_dir_index || backref->found_dir_item) {
2567                         *type = backref->filetype;
2568                         return 0;
2569                 }
2570         }
2571         return -ENOENT;
2572 }
2573
2574 /*
2575  * To determine the file name for nlink repair
2576  *
2577  * Return 0 if file name is found, set name and namelen.
2578  * Return -ENOENT if file name is not found.
2579  */
2580 static int find_file_name(struct inode_record *rec,
2581                           char *name, int *namelen)
2582 {
2583         struct inode_backref *backref;
2584
2585         list_for_each_entry(backref, &rec->backrefs, list) {
2586                 if (backref->found_dir_index || backref->found_dir_item ||
2587                     backref->found_inode_ref) {
2588                         memcpy(name, backref->name, backref->namelen);
2589                         *namelen = backref->namelen;
2590                         return 0;
2591                 }
2592         }
2593         return -ENOENT;
2594 }
2595
2596 /* Reset the nlink of the inode to the correct one */
2597 static int reset_nlink(struct btrfs_trans_handle *trans,
2598                        struct btrfs_root *root,
2599                        struct btrfs_path *path,
2600                        struct inode_record *rec)
2601 {
2602         struct inode_backref *backref;
2603         struct inode_backref *tmp;
2604         struct btrfs_key key;
2605         struct btrfs_inode_item *inode_item;
2606         int ret = 0;
2607
2608         /* We don't believe this either, reset it and iterate backref */
2609         rec->found_link = 0;
2610
2611         /* Remove all backref including the valid ones */
2612         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2613                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2614                                    backref->index, backref->name,
2615                                    backref->namelen, 0);
2616                 if (ret < 0)
2617                         goto out;
2618
2619                 /* remove invalid backref, so it won't be added back */
2620                 if (!(backref->found_dir_index &&
2621                       backref->found_dir_item &&
2622                       backref->found_inode_ref)) {
2623                         list_del(&backref->list);
2624                         free(backref);
2625                 } else {
2626                         rec->found_link++;
2627                 }
2628         }
2629
2630         /* Set nlink to 0 */
2631         key.objectid = rec->ino;
2632         key.type = BTRFS_INODE_ITEM_KEY;
2633         key.offset = 0;
2634         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2635         if (ret < 0)
2636                 goto out;
2637         if (ret > 0) {
2638                 ret = -ENOENT;
2639                 goto out;
2640         }
2641         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2642                                     struct btrfs_inode_item);
2643         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2644         btrfs_mark_buffer_dirty(path->nodes[0]);
2645         btrfs_release_path(path);
2646
2647         /*
2648          * Add back valid inode_ref/dir_item/dir_index,
2649          * add_link() will handle the nlink inc, so new nlink must be correct
2650          */
2651         list_for_each_entry(backref, &rec->backrefs, list) {
2652                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2653                                      backref->name, backref->namelen,
2654                                      backref->filetype, &backref->index, 1);
2655                 if (ret < 0)
2656                         goto out;
2657         }
2658 out:
2659         btrfs_release_path(path);
2660         return ret;
2661 }
2662
2663 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2664                                struct btrfs_root *root,
2665                                struct btrfs_path *path,
2666                                struct inode_record *rec)
2667 {
2668         char *dir_name = "lost+found";
2669         char namebuf[BTRFS_NAME_LEN] = {0};
2670         u64 lost_found_ino;
2671         u32 mode = 0700;
2672         u8 type = 0;
2673         int namelen = 0;
2674         int name_recovered = 0;
2675         int type_recovered = 0;
2676         int ret = 0;
2677
2678         /*
2679          * Get file name and type first before these invalid inode ref
2680          * are deleted by remove_all_invalid_backref()
2681          */
2682         name_recovered = !find_file_name(rec, namebuf, &namelen);
2683         type_recovered = !find_file_type(rec, &type);
2684
2685         if (!name_recovered) {
2686                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2687                        rec->ino, rec->ino);
2688                 namelen = count_digits(rec->ino);
2689                 sprintf(namebuf, "%llu", rec->ino);
2690                 name_recovered = 1;
2691         }
2692         if (!type_recovered) {
2693                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2694                        rec->ino);
2695                 type = BTRFS_FT_REG_FILE;
2696                 type_recovered = 1;
2697         }
2698
2699         ret = reset_nlink(trans, root, path, rec);
2700         if (ret < 0) {
2701                 fprintf(stderr,
2702                         "Failed to reset nlink for inode %llu: %s\n",
2703                         rec->ino, strerror(-ret));
2704                 goto out;
2705         }
2706
2707         if (rec->found_link == 0) {
2708                 lost_found_ino = root->highest_inode;
2709                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2710                         ret = -EOVERFLOW;
2711                         goto out;
2712                 }
2713                 lost_found_ino++;
2714                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2715                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2716                                   mode);
2717                 if (ret < 0) {
2718                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2719                                 dir_name, strerror(-ret));
2720                         goto out;
2721                 }
2722                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2723                                      namebuf, namelen, type, NULL, 1);
2724                 /*
2725                  * Add ".INO" suffix several times to handle case where
2726                  * "FILENAME.INO" is already taken by another file.
2727                  */
2728                 while (ret == -EEXIST) {
2729                         /*
2730                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2731                          */
2732                         if (namelen + count_digits(rec->ino) + 1 >
2733                             BTRFS_NAME_LEN) {
2734                                 ret = -EFBIG;
2735                                 goto out;
2736                         }
2737                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2738                                  ".%llu", rec->ino);
2739                         namelen += count_digits(rec->ino) + 1;
2740                         ret = btrfs_add_link(trans, root, rec->ino,
2741                                              lost_found_ino, namebuf,
2742                                              namelen, type, NULL, 1);
2743                 }
2744                 if (ret < 0) {
2745                         fprintf(stderr,
2746                                 "Failed to link the inode %llu to %s dir: %s\n",
2747                                 rec->ino, dir_name, strerror(-ret));
2748                         goto out;
2749                 }
2750                 /*
2751                  * Just increase the found_link, don't actually add the
2752                  * backref. This will make things easier and this inode
2753                  * record will be freed after the repair is done.
2754                  * So fsck will not report problem about this inode.
2755                  */
2756                 rec->found_link++;
2757                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2758                        namelen, namebuf, dir_name);
2759         }
2760         printf("Fixed the nlink of inode %llu\n", rec->ino);
2761 out:
2762         /*
2763          * Clear the flag anyway, or we will loop forever for the same inode
2764          * as it will not be removed from the bad inode list and the dead loop
2765          * happens.
2766          */
2767         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2768         btrfs_release_path(path);
2769         return ret;
2770 }
2771
2772 /*
2773  * Check if there is any normal(reg or prealloc) file extent for given
2774  * ino.
2775  * This is used to determine the file type when neither its dir_index/item or
2776  * inode_item exists.
2777  *
2778  * This will *NOT* report error, if any error happens, just consider it does
2779  * not have any normal file extent.
2780  */
2781 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2782 {
2783         struct btrfs_path *path;
2784         struct btrfs_key key;
2785         struct btrfs_key found_key;
2786         struct btrfs_file_extent_item *fi;
2787         u8 type;
2788         int ret = 0;
2789
2790         path = btrfs_alloc_path();
2791         if (!path)
2792                 goto out;
2793         key.objectid = ino;
2794         key.type = BTRFS_EXTENT_DATA_KEY;
2795         key.offset = 0;
2796
2797         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2798         if (ret < 0) {
2799                 ret = 0;
2800                 goto out;
2801         }
2802         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2803                 ret = btrfs_next_leaf(root, path);
2804                 if (ret) {
2805                         ret = 0;
2806                         goto out;
2807                 }
2808         }
2809         while (1) {
2810                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2811                                       path->slots[0]);
2812                 if (found_key.objectid != ino ||
2813                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2814                         break;
2815                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2816                                     struct btrfs_file_extent_item);
2817                 type = btrfs_file_extent_type(path->nodes[0], fi);
2818                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2819                         ret = 1;
2820                         goto out;
2821                 }
2822         }
2823 out:
2824         btrfs_free_path(path);
2825         return ret;
2826 }
2827
2828 static u32 btrfs_type_to_imode(u8 type)
2829 {
2830         static u32 imode_by_btrfs_type[] = {
2831                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2832                 [BTRFS_FT_DIR]          = S_IFDIR,
2833                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2834                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2835                 [BTRFS_FT_FIFO]         = S_IFIFO,
2836                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2837                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2838         };
2839
2840         return imode_by_btrfs_type[(type)];
2841 }
2842
2843 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2844                                 struct btrfs_root *root,
2845                                 struct btrfs_path *path,
2846                                 struct inode_record *rec)
2847 {
2848         u8 filetype;
2849         u32 mode = 0700;
2850         int type_recovered = 0;
2851         int ret = 0;
2852
2853         printf("Trying to rebuild inode:%llu\n", rec->ino);
2854
2855         type_recovered = !find_file_type(rec, &filetype);
2856
2857         /*
2858          * Try to determine inode type if type not found.
2859          *
2860          * For found regular file extent, it must be FILE.
2861          * For found dir_item/index, it must be DIR.
2862          *
2863          * For undetermined one, use FILE as fallback.
2864          *
2865          * TODO:
2866          * 1. If found backref(inode_index/item is already handled) to it,
2867          *    it must be DIR.
2868          *    Need new inode-inode ref structure to allow search for that.
2869          */
2870         if (!type_recovered) {
2871                 if (rec->found_file_extent &&
2872                     find_normal_file_extent(root, rec->ino)) {
2873                         type_recovered = 1;
2874                         filetype = BTRFS_FT_REG_FILE;
2875                 } else if (rec->found_dir_item) {
2876                         type_recovered = 1;
2877                         filetype = BTRFS_FT_DIR;
2878                 } else if (!list_empty(&rec->orphan_extents)) {
2879                         type_recovered = 1;
2880                         filetype = BTRFS_FT_REG_FILE;
2881                 } else{
2882                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2883                                rec->ino);
2884                         type_recovered = 1;
2885                         filetype = BTRFS_FT_REG_FILE;
2886                 }
2887         }
2888
2889         ret = btrfs_new_inode(trans, root, rec->ino,
2890                               mode | btrfs_type_to_imode(filetype));
2891         if (ret < 0)
2892                 goto out;
2893
2894         /*
2895          * Here inode rebuild is done, we only rebuild the inode item,
2896          * don't repair the nlink(like move to lost+found).
2897          * That is the job of nlink repair.
2898          *
2899          * We just fill the record and return
2900          */
2901         rec->found_dir_item = 1;
2902         rec->imode = mode | btrfs_type_to_imode(filetype);
2903         rec->nlink = 0;
2904         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2905         /* Ensure the inode_nlinks repair function will be called */
2906         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2907 out:
2908         return ret;
2909 }
2910
2911 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2912                                       struct btrfs_root *root,
2913                                       struct btrfs_path *path,
2914                                       struct inode_record *rec)
2915 {
2916         struct orphan_data_extent *orphan;
2917         struct orphan_data_extent *tmp;
2918         int ret = 0;
2919
2920         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2921                 /*
2922                  * Check for conflicting file extents
2923                  *
2924                  * Here we don't know whether the extents is compressed or not,
2925                  * so we can only assume it not compressed nor data offset,
2926                  * and use its disk_len as extent length.
2927                  */
2928                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2929                                        orphan->offset, orphan->disk_len, 0);
2930                 btrfs_release_path(path);
2931                 if (ret < 0)
2932                         goto out;
2933                 if (!ret) {
2934                         fprintf(stderr,
2935                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2936                                 orphan->disk_bytenr, orphan->disk_len);
2937                         ret = btrfs_free_extent(trans,
2938                                         root->fs_info->extent_root,
2939                                         orphan->disk_bytenr, orphan->disk_len,
2940                                         0, root->objectid, orphan->objectid,
2941                                         orphan->offset);
2942                         if (ret < 0)
2943                                 goto out;
2944                 }
2945                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2946                                 orphan->offset, orphan->disk_bytenr,
2947                                 orphan->disk_len, orphan->disk_len);
2948                 if (ret < 0)
2949                         goto out;
2950
2951                 /* Update file size info */
2952                 rec->found_size += orphan->disk_len;
2953                 if (rec->found_size == rec->nbytes)
2954                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2955
2956                 /* Update the file extent hole info too */
2957                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2958                                            orphan->disk_len);
2959                 if (ret < 0)
2960                         goto out;
2961                 if (RB_EMPTY_ROOT(&rec->holes))
2962                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2963
2964                 list_del(&orphan->list);
2965                 free(orphan);
2966         }
2967         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2968 out:
2969         return ret;
2970 }
2971
2972 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2973                                         struct btrfs_root *root,
2974                                         struct btrfs_path *path,
2975                                         struct inode_record *rec)
2976 {
2977         struct rb_node *node;
2978         struct file_extent_hole *hole;
2979         int found = 0;
2980         int ret = 0;
2981
2982         node = rb_first(&rec->holes);
2983
2984         while (node) {
2985                 found = 1;
2986                 hole = rb_entry(node, struct file_extent_hole, node);
2987                 ret = btrfs_punch_hole(trans, root, rec->ino,
2988                                        hole->start, hole->len);
2989                 if (ret < 0)
2990                         goto out;
2991                 ret = del_file_extent_hole(&rec->holes, hole->start,
2992                                            hole->len);
2993                 if (ret < 0)
2994                         goto out;
2995                 if (RB_EMPTY_ROOT(&rec->holes))
2996                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2997                 node = rb_first(&rec->holes);
2998         }
2999         /* special case for a file losing all its file extent */
3000         if (!found) {
3001                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3002                                        round_up(rec->isize, root->sectorsize));
3003                 if (ret < 0)
3004                         goto out;
3005         }
3006         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3007                rec->ino, root->objectid);
3008 out:
3009         return ret;
3010 }
3011
3012 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3013 {
3014         struct btrfs_trans_handle *trans;
3015         struct btrfs_path *path;
3016         int ret = 0;
3017
3018         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3019                              I_ERR_NO_ORPHAN_ITEM |
3020                              I_ERR_LINK_COUNT_WRONG |
3021                              I_ERR_NO_INODE_ITEM |
3022                              I_ERR_FILE_EXTENT_ORPHAN |
3023                              I_ERR_FILE_EXTENT_DISCOUNT|
3024                              I_ERR_FILE_NBYTES_WRONG)))
3025                 return rec->errors;
3026
3027         path = btrfs_alloc_path();
3028         if (!path)
3029                 return -ENOMEM;
3030
3031         /*
3032          * For nlink repair, it may create a dir and add link, so
3033          * 2 for parent(256)'s dir_index and dir_item
3034          * 2 for lost+found dir's inode_item and inode_ref
3035          * 1 for the new inode_ref of the file
3036          * 2 for lost+found dir's dir_index and dir_item for the file
3037          */
3038         trans = btrfs_start_transaction(root, 7);
3039         if (IS_ERR(trans)) {
3040                 btrfs_free_path(path);
3041                 return PTR_ERR(trans);
3042         }
3043
3044         if (rec->errors & I_ERR_NO_INODE_ITEM)
3045                 ret = repair_inode_no_item(trans, root, path, rec);
3046         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3047                 ret = repair_inode_orphan_extent(trans, root, path, rec);
3048         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3049                 ret = repair_inode_discount_extent(trans, root, path, rec);
3050         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3051                 ret = repair_inode_isize(trans, root, path, rec);
3052         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3053                 ret = repair_inode_orphan_item(trans, root, path, rec);
3054         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3055                 ret = repair_inode_nlinks(trans, root, path, rec);
3056         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3057                 ret = repair_inode_nbytes(trans, root, path, rec);
3058         btrfs_commit_transaction(trans, root);
3059         btrfs_free_path(path);
3060         return ret;
3061 }
3062
3063 static int check_inode_recs(struct btrfs_root *root,
3064                             struct cache_tree *inode_cache)
3065 {
3066         struct cache_extent *cache;
3067         struct ptr_node *node;
3068         struct inode_record *rec;
3069         struct inode_backref *backref;
3070         int stage = 0;
3071         int ret = 0;
3072         int err = 0;
3073         u64 error = 0;
3074         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3075
3076         if (btrfs_root_refs(&root->root_item) == 0) {
3077                 if (!cache_tree_empty(inode_cache))
3078                         fprintf(stderr, "warning line %d\n", __LINE__);
3079                 return 0;
3080         }
3081
3082         /*
3083          * We need to record the highest inode number for later 'lost+found'
3084          * dir creation.
3085          * We must select an ino not used/referred by any existing inode, or
3086          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3087          * this may cause 'lost+found' dir has wrong nlinks.
3088          */
3089         cache = last_cache_extent(inode_cache);
3090         if (cache) {
3091                 node = container_of(cache, struct ptr_node, cache);
3092                 rec = node->data;
3093                 if (rec->ino > root->highest_inode)
3094                         root->highest_inode = rec->ino;
3095         }
3096
3097         /*
3098          * We need to repair backrefs first because we could change some of the
3099          * errors in the inode recs.
3100          *
3101          * We also need to go through and delete invalid backrefs first and then
3102          * add the correct ones second.  We do this because we may get EEXIST
3103          * when adding back the correct index because we hadn't yet deleted the
3104          * invalid index.
3105          *
3106          * For example, if we were missing a dir index then the directories
3107          * isize would be wrong, so if we fixed the isize to what we thought it
3108          * would be and then fixed the backref we'd still have a invalid fs, so
3109          * we need to add back the dir index and then check to see if the isize
3110          * is still wrong.
3111          */
3112         while (stage < 3) {
3113                 stage++;
3114                 if (stage == 3 && !err)
3115                         break;
3116
3117                 cache = search_cache_extent(inode_cache, 0);
3118                 while (repair && cache) {
3119                         node = container_of(cache, struct ptr_node, cache);
3120                         rec = node->data;
3121                         cache = next_cache_extent(cache);
3122
3123                         /* Need to free everything up and rescan */
3124                         if (stage == 3) {
3125                                 remove_cache_extent(inode_cache, &node->cache);
3126                                 free(node);
3127                                 free_inode_rec(rec);
3128                                 continue;
3129                         }
3130
3131                         if (list_empty(&rec->backrefs))
3132                                 continue;
3133
3134                         ret = repair_inode_backrefs(root, rec, inode_cache,
3135                                                     stage == 1);
3136                         if (ret < 0) {
3137                                 err = ret;
3138                                 stage = 2;
3139                                 break;
3140                         } if (ret > 0) {
3141                                 err = -EAGAIN;
3142                         }
3143                 }
3144         }
3145         if (err)
3146                 return err;
3147
3148         rec = get_inode_rec(inode_cache, root_dirid, 0);
3149         BUG_ON(IS_ERR(rec));
3150         if (rec) {
3151                 ret = check_root_dir(rec);
3152                 if (ret) {
3153                         fprintf(stderr, "root %llu root dir %llu error\n",
3154                                 (unsigned long long)root->root_key.objectid,
3155                                 (unsigned long long)root_dirid);
3156                         print_inode_error(root, rec);
3157                         error++;
3158                 }
3159         } else {
3160                 if (repair) {
3161                         struct btrfs_trans_handle *trans;
3162
3163                         trans = btrfs_start_transaction(root, 1);
3164                         if (IS_ERR(trans)) {
3165                                 err = PTR_ERR(trans);
3166                                 return err;
3167                         }
3168
3169                         fprintf(stderr,
3170                                 "root %llu missing its root dir, recreating\n",
3171                                 (unsigned long long)root->objectid);
3172
3173                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3174                         BUG_ON(ret);
3175
3176                         btrfs_commit_transaction(trans, root);
3177                         return -EAGAIN;
3178                 }
3179
3180                 fprintf(stderr, "root %llu root dir %llu not found\n",
3181                         (unsigned long long)root->root_key.objectid,
3182                         (unsigned long long)root_dirid);
3183         }
3184
3185         while (1) {
3186                 cache = search_cache_extent(inode_cache, 0);
3187                 if (!cache)
3188                         break;
3189                 node = container_of(cache, struct ptr_node, cache);
3190                 rec = node->data;
3191                 remove_cache_extent(inode_cache, &node->cache);
3192                 free(node);
3193                 if (rec->ino == root_dirid ||
3194                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3195                         free_inode_rec(rec);
3196                         continue;
3197                 }
3198
3199                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3200                         ret = check_orphan_item(root, rec->ino);
3201                         if (ret == 0)
3202                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3203                         if (can_free_inode_rec(rec)) {
3204                                 free_inode_rec(rec);
3205                                 continue;
3206                         }
3207                 }
3208
3209                 if (!rec->found_inode_item)
3210                         rec->errors |= I_ERR_NO_INODE_ITEM;
3211                 if (rec->found_link != rec->nlink)
3212                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3213                 if (repair) {
3214                         ret = try_repair_inode(root, rec);
3215                         if (ret == 0 && can_free_inode_rec(rec)) {
3216                                 free_inode_rec(rec);
3217                                 continue;
3218                         }
3219                         ret = 0;
3220                 }
3221
3222                 if (!(repair && ret == 0))
3223                         error++;
3224                 print_inode_error(root, rec);
3225                 list_for_each_entry(backref, &rec->backrefs, list) {
3226                         if (!backref->found_dir_item)
3227                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3228                         if (!backref->found_dir_index)
3229                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3230                         if (!backref->found_inode_ref)
3231                                 backref->errors |= REF_ERR_NO_INODE_REF;
3232                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3233                                 " namelen %u name %s filetype %d errors %x",
3234                                 (unsigned long long)backref->dir,
3235                                 (unsigned long long)backref->index,
3236                                 backref->namelen, backref->name,
3237                                 backref->filetype, backref->errors);
3238                         print_ref_error(backref->errors);
3239                 }
3240                 free_inode_rec(rec);
3241         }
3242         return (error > 0) ? -1 : 0;
3243 }
3244
3245 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3246                                         u64 objectid)
3247 {
3248         struct cache_extent *cache;
3249         struct root_record *rec = NULL;
3250         int ret;
3251
3252         cache = lookup_cache_extent(root_cache, objectid, 1);
3253         if (cache) {
3254                 rec = container_of(cache, struct root_record, cache);
3255         } else {
3256                 rec = calloc(1, sizeof(*rec));
3257                 if (!rec)
3258                         return ERR_PTR(-ENOMEM);
3259                 rec->objectid = objectid;
3260                 INIT_LIST_HEAD(&rec->backrefs);
3261                 rec->cache.start = objectid;
3262                 rec->cache.size = 1;
3263
3264                 ret = insert_cache_extent(root_cache, &rec->cache);
3265                 if (ret)
3266                         return ERR_PTR(-EEXIST);
3267         }
3268         return rec;
3269 }
3270
3271 static struct root_backref *get_root_backref(struct root_record *rec,
3272                                              u64 ref_root, u64 dir, u64 index,
3273                                              const char *name, int namelen)
3274 {
3275         struct root_backref *backref;
3276
3277         list_for_each_entry(backref, &rec->backrefs, list) {
3278                 if (backref->ref_root != ref_root || backref->dir != dir ||
3279                     backref->namelen != namelen)
3280                         continue;
3281                 if (memcmp(name, backref->name, namelen))
3282                         continue;
3283                 return backref;
3284         }
3285
3286         backref = calloc(1, sizeof(*backref) + namelen + 1);
3287         if (!backref)
3288                 return NULL;
3289         backref->ref_root = ref_root;
3290         backref->dir = dir;
3291         backref->index = index;
3292         backref->namelen = namelen;
3293         memcpy(backref->name, name, namelen);
3294         backref->name[namelen] = '\0';
3295         list_add_tail(&backref->list, &rec->backrefs);
3296         return backref;
3297 }
3298
3299 static void free_root_record(struct cache_extent *cache)
3300 {
3301         struct root_record *rec;
3302         struct root_backref *backref;
3303
3304         rec = container_of(cache, struct root_record, cache);
3305         while (!list_empty(&rec->backrefs)) {
3306                 backref = to_root_backref(rec->backrefs.next);
3307                 list_del(&backref->list);
3308                 free(backref);
3309         }
3310
3311         kfree(rec);
3312 }
3313
3314 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3315
3316 static int add_root_backref(struct cache_tree *root_cache,
3317                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3318                             const char *name, int namelen,
3319                             int item_type, int errors)
3320 {
3321         struct root_record *rec;
3322         struct root_backref *backref;
3323
3324         rec = get_root_rec(root_cache, root_id);
3325         BUG_ON(IS_ERR(rec));
3326         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3327         BUG_ON(!backref);
3328
3329         backref->errors |= errors;
3330
3331         if (item_type != BTRFS_DIR_ITEM_KEY) {
3332                 if (backref->found_dir_index || backref->found_back_ref ||
3333                     backref->found_forward_ref) {
3334                         if (backref->index != index)
3335                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3336                 } else {
3337                         backref->index = index;
3338                 }
3339         }
3340
3341         if (item_type == BTRFS_DIR_ITEM_KEY) {
3342                 if (backref->found_forward_ref)
3343                         rec->found_ref++;
3344                 backref->found_dir_item = 1;
3345         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3346                 backref->found_dir_index = 1;
3347         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3348                 if (backref->found_forward_ref)
3349                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3350                 else if (backref->found_dir_item)
3351                         rec->found_ref++;
3352                 backref->found_forward_ref = 1;
3353         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3354                 if (backref->found_back_ref)
3355                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3356                 backref->found_back_ref = 1;
3357         } else {
3358                 BUG_ON(1);
3359         }
3360
3361         if (backref->found_forward_ref && backref->found_dir_item)
3362                 backref->reachable = 1;
3363         return 0;
3364 }
3365
3366 static int merge_root_recs(struct btrfs_root *root,
3367                            struct cache_tree *src_cache,
3368                            struct cache_tree *dst_cache)
3369 {
3370         struct cache_extent *cache;
3371         struct ptr_node *node;
3372         struct inode_record *rec;
3373         struct inode_backref *backref;
3374         int ret = 0;
3375
3376         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3377                 free_inode_recs_tree(src_cache);
3378                 return 0;
3379         }
3380
3381         while (1) {
3382                 cache = search_cache_extent(src_cache, 0);
3383                 if (!cache)
3384                         break;
3385                 node = container_of(cache, struct ptr_node, cache);
3386                 rec = node->data;
3387                 remove_cache_extent(src_cache, &node->cache);
3388                 free(node);
3389
3390                 ret = is_child_root(root, root->objectid, rec->ino);
3391                 if (ret < 0)
3392                         break;
3393                 else if (ret == 0)
3394                         goto skip;
3395
3396                 list_for_each_entry(backref, &rec->backrefs, list) {
3397                         BUG_ON(backref->found_inode_ref);
3398                         if (backref->found_dir_item)
3399                                 add_root_backref(dst_cache, rec->ino,
3400                                         root->root_key.objectid, backref->dir,
3401                                         backref->index, backref->name,
3402                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3403                                         backref->errors);
3404                         if (backref->found_dir_index)
3405                                 add_root_backref(dst_cache, rec->ino,
3406                                         root->root_key.objectid, backref->dir,
3407                                         backref->index, backref->name,
3408                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3409                                         backref->errors);
3410                 }
3411 skip:
3412                 free_inode_rec(rec);
3413         }
3414         if (ret < 0)
3415                 return ret;
3416         return 0;
3417 }
3418
3419 static int check_root_refs(struct btrfs_root *root,
3420                            struct cache_tree *root_cache)
3421 {
3422         struct root_record *rec;
3423         struct root_record *ref_root;
3424         struct root_backref *backref;
3425         struct cache_extent *cache;
3426         int loop = 1;
3427         int ret;
3428         int error;
3429         int errors = 0;
3430
3431         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3432         BUG_ON(IS_ERR(rec));
3433         rec->found_ref = 1;
3434
3435         /* fixme: this can not detect circular references */
3436         while (loop) {
3437                 loop = 0;
3438                 cache = search_cache_extent(root_cache, 0);
3439                 while (1) {
3440                         if (!cache)
3441                                 break;
3442                         rec = container_of(cache, struct root_record, cache);
3443                         cache = next_cache_extent(cache);
3444
3445                         if (rec->found_ref == 0)
3446                                 continue;
3447
3448                         list_for_each_entry(backref, &rec->backrefs, list) {
3449                                 if (!backref->reachable)
3450                                         continue;
3451
3452                                 ref_root = get_root_rec(root_cache,
3453                                                         backref->ref_root);
3454                                 BUG_ON(IS_ERR(ref_root));
3455                                 if (ref_root->found_ref > 0)
3456                                         continue;
3457
3458                                 backref->reachable = 0;
3459                                 rec->found_ref--;
3460                                 if (rec->found_ref == 0)
3461                                         loop = 1;
3462                         }
3463                 }
3464         }
3465
3466         cache = search_cache_extent(root_cache, 0);
3467         while (1) {
3468                 if (!cache)
3469                         break;
3470                 rec = container_of(cache, struct root_record, cache);
3471                 cache = next_cache_extent(cache);
3472
3473                 if (rec->found_ref == 0 &&
3474                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3475                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3476                         ret = check_orphan_item(root->fs_info->tree_root,
3477                                                 rec->objectid);
3478                         if (ret == 0)
3479                                 continue;
3480
3481                         /*
3482                          * If we don't have a root item then we likely just have
3483                          * a dir item in a snapshot for this root but no actual
3484                          * ref key or anything so it's meaningless.
3485                          */
3486                         if (!rec->found_root_item)
3487                                 continue;
3488                         errors++;
3489                         fprintf(stderr, "fs tree %llu not referenced\n",
3490                                 (unsigned long long)rec->objectid);
3491                 }
3492
3493                 error = 0;
3494                 if (rec->found_ref > 0 && !rec->found_root_item)
3495                         error = 1;
3496                 list_for_each_entry(backref, &rec->backrefs, list) {
3497                         if (!backref->found_dir_item)
3498                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3499                         if (!backref->found_dir_index)
3500                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3501                         if (!backref->found_back_ref)
3502                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3503                         if (!backref->found_forward_ref)
3504                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3505                         if (backref->reachable && backref->errors)
3506                                 error = 1;
3507                 }
3508                 if (!error)
3509                         continue;
3510
3511                 errors++;
3512                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3513                         (unsigned long long)rec->objectid, rec->found_ref,
3514                          rec->found_root_item ? "" : "not found");
3515
3516                 list_for_each_entry(backref, &rec->backrefs, list) {
3517                         if (!backref->reachable)
3518                                 continue;
3519                         if (!backref->errors && rec->found_root_item)
3520                                 continue;
3521                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3522                                 " index %llu namelen %u name %s errors %x\n",
3523                                 (unsigned long long)backref->ref_root,
3524                                 (unsigned long long)backref->dir,
3525                                 (unsigned long long)backref->index,
3526                                 backref->namelen, backref->name,
3527                                 backref->errors);
3528                         print_ref_error(backref->errors);
3529                 }
3530         }
3531         return errors > 0 ? 1 : 0;
3532 }
3533
3534 static int process_root_ref(struct extent_buffer *eb, int slot,
3535                             struct btrfs_key *key,
3536                             struct cache_tree *root_cache)
3537 {
3538         u64 dirid;
3539         u64 index;
3540         u32 len;
3541         u32 name_len;
3542         struct btrfs_root_ref *ref;
3543         char namebuf[BTRFS_NAME_LEN];
3544         int error;
3545
3546         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3547
3548         dirid = btrfs_root_ref_dirid(eb, ref);
3549         index = btrfs_root_ref_sequence(eb, ref);
3550         name_len = btrfs_root_ref_name_len(eb, ref);
3551
3552         if (name_len <= BTRFS_NAME_LEN) {
3553                 len = name_len;
3554                 error = 0;
3555         } else {
3556                 len = BTRFS_NAME_LEN;
3557                 error = REF_ERR_NAME_TOO_LONG;
3558         }
3559         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3560
3561         if (key->type == BTRFS_ROOT_REF_KEY) {
3562                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3563                                  index, namebuf, len, key->type, error);
3564         } else {
3565                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3566                                  index, namebuf, len, key->type, error);
3567         }
3568         return 0;
3569 }
3570
3571 static void free_corrupt_block(struct cache_extent *cache)
3572 {
3573         struct btrfs_corrupt_block *corrupt;
3574
3575         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3576         free(corrupt);
3577 }
3578
3579 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3580
3581 /*
3582  * Repair the btree of the given root.
3583  *
3584  * The fix is to remove the node key in corrupt_blocks cache_tree.
3585  * and rebalance the tree.
3586  * After the fix, the btree should be writeable.
3587  */
3588 static int repair_btree(struct btrfs_root *root,
3589                         struct cache_tree *corrupt_blocks)
3590 {
3591         struct btrfs_trans_handle *trans;
3592         struct btrfs_path *path;
3593         struct btrfs_corrupt_block *corrupt;
3594         struct cache_extent *cache;
3595         struct btrfs_key key;
3596         u64 offset;
3597         int level;
3598         int ret = 0;
3599
3600         if (cache_tree_empty(corrupt_blocks))
3601                 return 0;
3602
3603         path = btrfs_alloc_path();
3604         if (!path)
3605                 return -ENOMEM;
3606
3607         trans = btrfs_start_transaction(root, 1);
3608         if (IS_ERR(trans)) {
3609                 ret = PTR_ERR(trans);
3610                 fprintf(stderr, "Error starting transaction: %s\n",
3611                         strerror(-ret));
3612                 goto out_free_path;
3613         }
3614         cache = first_cache_extent(corrupt_blocks);
3615         while (cache) {
3616                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3617                                        cache);
3618                 level = corrupt->level;
3619                 path->lowest_level = level;
3620                 key.objectid = corrupt->key.objectid;
3621                 key.type = corrupt->key.type;
3622                 key.offset = corrupt->key.offset;
3623
3624                 /*
3625                  * Here we don't want to do any tree balance, since it may
3626                  * cause a balance with corrupted brother leaf/node,
3627                  * so ins_len set to 0 here.
3628                  * Balance will be done after all corrupt node/leaf is deleted.
3629                  */
3630                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3631                 if (ret < 0)
3632                         goto out;
3633                 offset = btrfs_node_blockptr(path->nodes[level],
3634                                              path->slots[level]);
3635
3636                 /* Remove the ptr */
3637                 ret = btrfs_del_ptr(trans, root, path, level,
3638                                     path->slots[level]);
3639                 if (ret < 0)
3640                         goto out;
3641                 /*
3642                  * Remove the corresponding extent
3643                  * return value is not concerned.
3644                  */
3645                 btrfs_release_path(path);
3646                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3647                                         0, root->root_key.objectid,
3648                                         level - 1, 0);
3649                 cache = next_cache_extent(cache);
3650         }
3651
3652         /* Balance the btree using btrfs_search_slot() */
3653         cache = first_cache_extent(corrupt_blocks);
3654         while (cache) {
3655                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3656                                        cache);
3657                 memcpy(&key, &corrupt->key, sizeof(key));
3658                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3659                 if (ret < 0)
3660                         goto out;
3661                 /* return will always >0 since it won't find the item */
3662                 ret = 0;
3663                 btrfs_release_path(path);
3664                 cache = next_cache_extent(cache);
3665         }
3666 out:
3667         btrfs_commit_transaction(trans, root);
3668 out_free_path:
3669         btrfs_free_path(path);
3670         return ret;
3671 }
3672
3673 static int check_fs_root(struct btrfs_root *root,
3674                          struct cache_tree *root_cache,
3675                          struct walk_control *wc)
3676 {
3677         int ret = 0;
3678         int err = 0;
3679         int wret;
3680         int level;
3681         struct btrfs_path path;
3682         struct shared_node root_node;
3683         struct root_record *rec;
3684         struct btrfs_root_item *root_item = &root->root_item;
3685         struct cache_tree corrupt_blocks;
3686         struct orphan_data_extent *orphan;
3687         struct orphan_data_extent *tmp;
3688         enum btrfs_tree_block_status status;
3689         struct node_refs nrefs;
3690
3691         /*
3692          * Reuse the corrupt_block cache tree to record corrupted tree block
3693          *
3694          * Unlike the usage in extent tree check, here we do it in a per
3695          * fs/subvol tree base.
3696          */
3697         cache_tree_init(&corrupt_blocks);
3698         root->fs_info->corrupt_blocks = &corrupt_blocks;
3699
3700         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3701                 rec = get_root_rec(root_cache, root->root_key.objectid);
3702                 BUG_ON(IS_ERR(rec));
3703                 if (btrfs_root_refs(root_item) > 0)
3704                         rec->found_root_item = 1;
3705         }
3706
3707         btrfs_init_path(&path);
3708         memset(&root_node, 0, sizeof(root_node));
3709         cache_tree_init(&root_node.root_cache);
3710         cache_tree_init(&root_node.inode_cache);
3711         memset(&nrefs, 0, sizeof(nrefs));
3712
3713         /* Move the orphan extent record to corresponding inode_record */
3714         list_for_each_entry_safe(orphan, tmp,
3715                                  &root->orphan_data_extents, list) {
3716                 struct inode_record *inode;
3717
3718                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3719                                       1);
3720                 BUG_ON(IS_ERR(inode));
3721                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3722                 list_move(&orphan->list, &inode->orphan_extents);
3723         }
3724
3725         level = btrfs_header_level(root->node);
3726         memset(wc->nodes, 0, sizeof(wc->nodes));
3727         wc->nodes[level] = &root_node;
3728         wc->active_node = level;
3729         wc->root_level = level;
3730
3731         /* We may not have checked the root block, lets do that now */
3732         if (btrfs_is_leaf(root->node))
3733                 status = btrfs_check_leaf(root, NULL, root->node);
3734         else
3735                 status = btrfs_check_node(root, NULL, root->node);
3736         if (status != BTRFS_TREE_BLOCK_CLEAN)
3737                 return -EIO;
3738
3739         if (btrfs_root_refs(root_item) > 0 ||
3740             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3741                 path.nodes[level] = root->node;
3742                 extent_buffer_get(root->node);
3743                 path.slots[level] = 0;
3744         } else {
3745                 struct btrfs_key key;
3746                 struct btrfs_disk_key found_key;
3747
3748                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3749                 level = root_item->drop_level;
3750                 path.lowest_level = level;
3751                 if (level > btrfs_header_level(root->node) ||
3752                     level >= BTRFS_MAX_LEVEL) {
3753                         error("ignoring invalid drop level: %u", level);
3754                         goto skip_walking;
3755                 }
3756                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3757                 if (wret < 0)
3758                         goto skip_walking;
3759                 btrfs_node_key(path.nodes[level], &found_key,
3760                                 path.slots[level]);
3761                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3762                                         sizeof(found_key)));
3763         }
3764
3765         while (1) {
3766                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3767                 if (wret < 0)
3768                         ret = wret;
3769                 if (wret != 0)
3770                         break;
3771
3772                 wret = walk_up_tree(root, &path, wc, &level);
3773                 if (wret < 0)
3774                         ret = wret;
3775                 if (wret != 0)
3776                         break;
3777         }
3778 skip_walking:
3779         btrfs_release_path(&path);
3780
3781         if (!cache_tree_empty(&corrupt_blocks)) {
3782                 struct cache_extent *cache;
3783                 struct btrfs_corrupt_block *corrupt;
3784
3785                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3786                        root->root_key.objectid);
3787                 cache = first_cache_extent(&corrupt_blocks);
3788                 while (cache) {
3789                         corrupt = container_of(cache,
3790                                                struct btrfs_corrupt_block,
3791                                                cache);
3792                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3793                                cache->start, corrupt->level,
3794                                corrupt->key.objectid, corrupt->key.type,
3795                                corrupt->key.offset);
3796                         cache = next_cache_extent(cache);
3797                 }
3798                 if (repair) {
3799                         printf("Try to repair the btree for root %llu\n",
3800                                root->root_key.objectid);
3801                         ret = repair_btree(root, &corrupt_blocks);
3802                         if (ret < 0)
3803                                 fprintf(stderr, "Failed to repair btree: %s\n",
3804                                         strerror(-ret));
3805                         if (!ret)
3806                                 printf("Btree for root %llu is fixed\n",
3807                                        root->root_key.objectid);
3808                 }
3809         }
3810
3811         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3812         if (err < 0)
3813                 ret = err;
3814
3815         if (root_node.current) {
3816                 root_node.current->checked = 1;
3817                 maybe_free_inode_rec(&root_node.inode_cache,
3818                                 root_node.current);
3819         }
3820
3821         err = check_inode_recs(root, &root_node.inode_cache);
3822         if (!ret)
3823                 ret = err;
3824
3825         free_corrupt_blocks_tree(&corrupt_blocks);
3826         root->fs_info->corrupt_blocks = NULL;
3827         free_orphan_data_extents(&root->orphan_data_extents);
3828         return ret;
3829 }
3830
3831 static int fs_root_objectid(u64 objectid)
3832 {
3833         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3834             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3835                 return 1;
3836         return is_fstree(objectid);
3837 }
3838
3839 static int check_fs_roots(struct btrfs_root *root,
3840                           struct cache_tree *root_cache)
3841 {
3842         struct btrfs_path path;
3843         struct btrfs_key key;
3844         struct walk_control wc;
3845         struct extent_buffer *leaf, *tree_node;
3846         struct btrfs_root *tmp_root;
3847         struct btrfs_root *tree_root = root->fs_info->tree_root;
3848         int ret;
3849         int err = 0;
3850
3851         if (ctx.progress_enabled) {
3852                 ctx.tp = TASK_FS_ROOTS;
3853                 task_start(ctx.info);
3854         }
3855
3856         /*
3857          * Just in case we made any changes to the extent tree that weren't
3858          * reflected into the free space cache yet.
3859          */
3860         if (repair)
3861                 reset_cached_block_groups(root->fs_info);
3862         memset(&wc, 0, sizeof(wc));
3863         cache_tree_init(&wc.shared);
3864         btrfs_init_path(&path);
3865
3866 again:
3867         key.offset = 0;
3868         key.objectid = 0;
3869         key.type = BTRFS_ROOT_ITEM_KEY;
3870         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3871         if (ret < 0) {
3872                 err = 1;
3873                 goto out;
3874         }
3875         tree_node = tree_root->node;
3876         while (1) {
3877                 if (tree_node != tree_root->node) {
3878                         free_root_recs_tree(root_cache);
3879                         btrfs_release_path(&path);
3880                         goto again;
3881                 }
3882                 leaf = path.nodes[0];
3883                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3884                         ret = btrfs_next_leaf(tree_root, &path);
3885                         if (ret) {
3886                                 if (ret < 0)
3887                                         err = 1;
3888                                 break;
3889                         }
3890                         leaf = path.nodes[0];
3891                 }
3892                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3893                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3894                     fs_root_objectid(key.objectid)) {
3895                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3896                                 tmp_root = btrfs_read_fs_root_no_cache(
3897                                                 root->fs_info, &key);
3898                         } else {
3899                                 key.offset = (u64)-1;
3900                                 tmp_root = btrfs_read_fs_root(
3901                                                 root->fs_info, &key);
3902                         }
3903                         if (IS_ERR(tmp_root)) {
3904                                 err = 1;
3905                                 goto next;
3906                         }
3907                         ret = check_fs_root(tmp_root, root_cache, &wc);
3908                         if (ret == -EAGAIN) {
3909                                 free_root_recs_tree(root_cache);
3910                                 btrfs_release_path(&path);
3911                                 goto again;
3912                         }
3913                         if (ret)
3914                                 err = 1;
3915                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3916                                 btrfs_free_fs_root(tmp_root);
3917                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3918                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3919                         process_root_ref(leaf, path.slots[0], &key,
3920                                          root_cache);
3921                 }
3922 next:
3923                 path.slots[0]++;
3924         }
3925 out:
3926         btrfs_release_path(&path);
3927         if (err)
3928                 free_extent_cache_tree(&wc.shared);
3929         if (!cache_tree_empty(&wc.shared))
3930                 fprintf(stderr, "warning line %d\n", __LINE__);
3931
3932         task_stop(ctx.info);
3933
3934         return err;
3935 }
3936
3937 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3938 {
3939         struct list_head *cur = rec->backrefs.next;
3940         struct extent_backref *back;
3941         struct tree_backref *tback;
3942         struct data_backref *dback;
3943         u64 found = 0;
3944         int err = 0;
3945
3946         while(cur != &rec->backrefs) {
3947                 back = to_extent_backref(cur);
3948                 cur = cur->next;
3949                 if (!back->found_extent_tree) {
3950                         err = 1;
3951                         if (!print_errs)
3952                                 goto out;
3953                         if (back->is_data) {
3954                                 dback = to_data_backref(back);
3955                                 fprintf(stderr, "Backref %llu %s %llu"
3956                                         " owner %llu offset %llu num_refs %lu"
3957                                         " not found in extent tree\n",
3958                                         (unsigned long long)rec->start,
3959                                         back->full_backref ?
3960                                         "parent" : "root",
3961                                         back->full_backref ?
3962                                         (unsigned long long)dback->parent:
3963                                         (unsigned long long)dback->root,
3964                                         (unsigned long long)dback->owner,
3965                                         (unsigned long long)dback->offset,
3966                                         (unsigned long)dback->num_refs);
3967                         } else {
3968                                 tback = to_tree_backref(back);
3969                                 fprintf(stderr, "Backref %llu parent %llu"
3970                                         " root %llu not found in extent tree\n",
3971                                         (unsigned long long)rec->start,
3972                                         (unsigned long long)tback->parent,
3973                                         (unsigned long long)tback->root);
3974                         }
3975                 }
3976                 if (!back->is_data && !back->found_ref) {
3977                         err = 1;
3978                         if (!print_errs)
3979                                 goto out;
3980                         tback = to_tree_backref(back);
3981                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3982                                 (unsigned long long)rec->start,
3983                                 back->full_backref ? "parent" : "root",
3984                                 back->full_backref ?
3985                                 (unsigned long long)tback->parent :
3986                                 (unsigned long long)tback->root, back);
3987                 }
3988                 if (back->is_data) {
3989                         dback = to_data_backref(back);
3990                         if (dback->found_ref != dback->num_refs) {
3991                                 err = 1;
3992                                 if (!print_errs)
3993                                         goto out;
3994                                 fprintf(stderr, "Incorrect local backref count"
3995                                         " on %llu %s %llu owner %llu"
3996                                         " offset %llu found %u wanted %u back %p\n",
3997                                         (unsigned long long)rec->start,
3998                                         back->full_backref ?
3999                                         "parent" : "root",
4000                                         back->full_backref ?
4001                                         (unsigned long long)dback->parent:
4002                                         (unsigned long long)dback->root,
4003                                         (unsigned long long)dback->owner,
4004                                         (unsigned long long)dback->offset,
4005                                         dback->found_ref, dback->num_refs, back);
4006                         }
4007                         if (dback->disk_bytenr != rec->start) {
4008                                 err = 1;
4009                                 if (!print_errs)
4010                                         goto out;
4011                                 fprintf(stderr, "Backref disk bytenr does not"
4012                                         " match extent record, bytenr=%llu, "
4013                                         "ref bytenr=%llu\n",
4014                                         (unsigned long long)rec->start,
4015                                         (unsigned long long)dback->disk_bytenr);
4016                         }
4017
4018                         if (dback->bytes != rec->nr) {
4019                                 err = 1;
4020                                 if (!print_errs)
4021                                         goto out;
4022                                 fprintf(stderr, "Backref bytes do not match "
4023                                         "extent backref, bytenr=%llu, ref "
4024                                         "bytes=%llu, backref bytes=%llu\n",
4025                                         (unsigned long long)rec->start,
4026                                         (unsigned long long)rec->nr,
4027                                         (unsigned long long)dback->bytes);
4028                         }
4029                 }
4030                 if (!back->is_data) {
4031                         found += 1;
4032                 } else {
4033                         dback = to_data_backref(back);
4034                         found += dback->found_ref;
4035                 }
4036         }
4037         if (found != rec->refs) {
4038                 err = 1;
4039                 if (!print_errs)
4040                         goto out;
4041                 fprintf(stderr, "Incorrect global backref count "
4042                         "on %llu found %llu wanted %llu\n",
4043                         (unsigned long long)rec->start,
4044                         (unsigned long long)found,
4045                         (unsigned long long)rec->refs);
4046         }
4047 out:
4048         return err;
4049 }
4050
4051 static int free_all_extent_backrefs(struct extent_record *rec)
4052 {
4053         struct extent_backref *back;
4054         struct list_head *cur;
4055         while (!list_empty(&rec->backrefs)) {
4056                 cur = rec->backrefs.next;
4057                 back = to_extent_backref(cur);
4058                 list_del(cur);
4059                 free(back);
4060         }
4061         return 0;
4062 }
4063
4064 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4065                                      struct cache_tree *extent_cache)
4066 {
4067         struct cache_extent *cache;
4068         struct extent_record *rec;
4069
4070         while (1) {
4071                 cache = first_cache_extent(extent_cache);
4072                 if (!cache)
4073                         break;
4074                 rec = container_of(cache, struct extent_record, cache);
4075                 remove_cache_extent(extent_cache, cache);
4076                 free_all_extent_backrefs(rec);
4077                 free(rec);
4078         }
4079 }
4080
4081 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4082                                  struct extent_record *rec)
4083 {
4084         if (rec->content_checked && rec->owner_ref_checked &&
4085             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4086             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4087             !rec->bad_full_backref && !rec->crossing_stripes &&
4088             !rec->wrong_chunk_type) {
4089                 remove_cache_extent(extent_cache, &rec->cache);
4090                 free_all_extent_backrefs(rec);
4091                 list_del_init(&rec->list);
4092                 free(rec);
4093         }
4094         return 0;
4095 }
4096
4097 static int check_owner_ref(struct btrfs_root *root,
4098                             struct extent_record *rec,
4099                             struct extent_buffer *buf)
4100 {
4101         struct extent_backref *node;
4102         struct tree_backref *back;
4103         struct btrfs_root *ref_root;
4104         struct btrfs_key key;
4105         struct btrfs_path path;
4106         struct extent_buffer *parent;
4107         int level;
4108         int found = 0;
4109         int ret;
4110
4111         list_for_each_entry(node, &rec->backrefs, list) {
4112                 if (node->is_data)
4113                         continue;
4114                 if (!node->found_ref)
4115                         continue;
4116                 if (node->full_backref)
4117                         continue;
4118                 back = to_tree_backref(node);
4119                 if (btrfs_header_owner(buf) == back->root)
4120                         return 0;
4121         }
4122         BUG_ON(rec->is_root);
4123
4124         /* try to find the block by search corresponding fs tree */
4125         key.objectid = btrfs_header_owner(buf);
4126         key.type = BTRFS_ROOT_ITEM_KEY;
4127         key.offset = (u64)-1;
4128
4129         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4130         if (IS_ERR(ref_root))
4131                 return 1;
4132
4133         level = btrfs_header_level(buf);
4134         if (level == 0)
4135                 btrfs_item_key_to_cpu(buf, &key, 0);
4136         else
4137                 btrfs_node_key_to_cpu(buf, &key, 0);
4138
4139         btrfs_init_path(&path);
4140         path.lowest_level = level + 1;
4141         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4142         if (ret < 0)
4143                 return 0;
4144
4145         parent = path.nodes[level + 1];
4146         if (parent && buf->start == btrfs_node_blockptr(parent,
4147                                                         path.slots[level + 1]))
4148                 found = 1;
4149
4150         btrfs_release_path(&path);
4151         return found ? 0 : 1;
4152 }
4153
4154 static int is_extent_tree_record(struct extent_record *rec)
4155 {
4156         struct list_head *cur = rec->backrefs.next;
4157         struct extent_backref *node;
4158         struct tree_backref *back;
4159         int is_extent = 0;
4160
4161         while(cur != &rec->backrefs) {
4162                 node = to_extent_backref(cur);
4163                 cur = cur->next;
4164                 if (node->is_data)
4165                         return 0;
4166                 back = to_tree_backref(node);
4167                 if (node->full_backref)
4168                         return 0;
4169                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4170                         is_extent = 1;
4171         }
4172         return is_extent;
4173 }
4174
4175
4176 static int record_bad_block_io(struct btrfs_fs_info *info,
4177                                struct cache_tree *extent_cache,
4178                                u64 start, u64 len)
4179 {
4180         struct extent_record *rec;
4181         struct cache_extent *cache;
4182         struct btrfs_key key;
4183
4184         cache = lookup_cache_extent(extent_cache, start, len);
4185         if (!cache)
4186                 return 0;
4187
4188         rec = container_of(cache, struct extent_record, cache);
4189         if (!is_extent_tree_record(rec))
4190                 return 0;
4191
4192         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4193         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4194 }
4195
4196 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4197                        struct extent_buffer *buf, int slot)
4198 {
4199         if (btrfs_header_level(buf)) {
4200                 struct btrfs_key_ptr ptr1, ptr2;
4201
4202                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4203                                    sizeof(struct btrfs_key_ptr));
4204                 read_extent_buffer(buf, &ptr2,
4205                                    btrfs_node_key_ptr_offset(slot + 1),
4206                                    sizeof(struct btrfs_key_ptr));
4207                 write_extent_buffer(buf, &ptr1,
4208                                     btrfs_node_key_ptr_offset(slot + 1),
4209                                     sizeof(struct btrfs_key_ptr));
4210                 write_extent_buffer(buf, &ptr2,
4211                                     btrfs_node_key_ptr_offset(slot),
4212                                     sizeof(struct btrfs_key_ptr));
4213                 if (slot == 0) {
4214                         struct btrfs_disk_key key;
4215                         btrfs_node_key(buf, &key, 0);
4216                         btrfs_fixup_low_keys(root, path, &key,
4217                                              btrfs_header_level(buf) + 1);
4218                 }
4219         } else {
4220                 struct btrfs_item *item1, *item2;
4221                 struct btrfs_key k1, k2;
4222                 char *item1_data, *item2_data;
4223                 u32 item1_offset, item2_offset, item1_size, item2_size;
4224
4225                 item1 = btrfs_item_nr(slot);
4226                 item2 = btrfs_item_nr(slot + 1);
4227                 btrfs_item_key_to_cpu(buf, &k1, slot);
4228                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4229                 item1_offset = btrfs_item_offset(buf, item1);
4230                 item2_offset = btrfs_item_offset(buf, item2);
4231                 item1_size = btrfs_item_size(buf, item1);
4232                 item2_size = btrfs_item_size(buf, item2);
4233
4234                 item1_data = malloc(item1_size);
4235                 if (!item1_data)
4236                         return -ENOMEM;
4237                 item2_data = malloc(item2_size);
4238                 if (!item2_data) {
4239                         free(item1_data);
4240                         return -ENOMEM;
4241                 }
4242
4243                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4244                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4245
4246                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4247                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4248                 free(item1_data);
4249                 free(item2_data);
4250
4251                 btrfs_set_item_offset(buf, item1, item2_offset);
4252                 btrfs_set_item_offset(buf, item2, item1_offset);
4253                 btrfs_set_item_size(buf, item1, item2_size);
4254                 btrfs_set_item_size(buf, item2, item1_size);
4255
4256                 path->slots[0] = slot;
4257                 btrfs_set_item_key_unsafe(root, path, &k2);
4258                 path->slots[0] = slot + 1;
4259                 btrfs_set_item_key_unsafe(root, path, &k1);
4260         }
4261         return 0;
4262 }
4263
4264 static int fix_key_order(struct btrfs_trans_handle *trans,
4265                          struct btrfs_root *root,
4266                          struct btrfs_path *path)
4267 {
4268         struct extent_buffer *buf;
4269         struct btrfs_key k1, k2;
4270         int i;
4271         int level = path->lowest_level;
4272         int ret = -EIO;
4273
4274         buf = path->nodes[level];
4275         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4276                 if (level) {
4277                         btrfs_node_key_to_cpu(buf, &k1, i);
4278                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4279                 } else {
4280                         btrfs_item_key_to_cpu(buf, &k1, i);
4281                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4282                 }
4283                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4284                         continue;
4285                 ret = swap_values(root, path, buf, i);
4286                 if (ret)
4287                         break;
4288                 btrfs_mark_buffer_dirty(buf);
4289                 i = 0;
4290         }
4291         return ret;
4292 }
4293
4294 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4295                              struct btrfs_root *root,
4296                              struct btrfs_path *path,
4297                              struct extent_buffer *buf, int slot)
4298 {
4299         struct btrfs_key key;
4300         int nritems = btrfs_header_nritems(buf);
4301
4302         btrfs_item_key_to_cpu(buf, &key, slot);
4303
4304         /* These are all the keys we can deal with missing. */
4305         if (key.type != BTRFS_DIR_INDEX_KEY &&
4306             key.type != BTRFS_EXTENT_ITEM_KEY &&
4307             key.type != BTRFS_METADATA_ITEM_KEY &&
4308             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4309             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4310                 return -1;
4311
4312         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4313                (unsigned long long)key.objectid, key.type,
4314                (unsigned long long)key.offset, slot, buf->start);
4315         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4316                               btrfs_item_nr_offset(slot + 1),
4317                               sizeof(struct btrfs_item) *
4318                               (nritems - slot - 1));
4319         btrfs_set_header_nritems(buf, nritems - 1);
4320         if (slot == 0) {
4321                 struct btrfs_disk_key disk_key;
4322
4323                 btrfs_item_key(buf, &disk_key, 0);
4324                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4325         }
4326         btrfs_mark_buffer_dirty(buf);
4327         return 0;
4328 }
4329
4330 static int fix_item_offset(struct btrfs_trans_handle *trans,
4331                            struct btrfs_root *root,
4332                            struct btrfs_path *path)
4333 {
4334         struct extent_buffer *buf;
4335         int i;
4336         int ret = 0;
4337
4338         /* We should only get this for leaves */
4339         BUG_ON(path->lowest_level);
4340         buf = path->nodes[0];
4341 again:
4342         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4343                 unsigned int shift = 0, offset;
4344
4345                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4346                     BTRFS_LEAF_DATA_SIZE(root)) {
4347                         if (btrfs_item_end_nr(buf, i) >
4348                             BTRFS_LEAF_DATA_SIZE(root)) {
4349                                 ret = delete_bogus_item(trans, root, path,
4350                                                         buf, i);
4351                                 if (!ret)
4352                                         goto again;
4353                                 fprintf(stderr, "item is off the end of the "
4354                                         "leaf, can't fix\n");
4355                                 ret = -EIO;
4356                                 break;
4357                         }
4358                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4359                                 btrfs_item_end_nr(buf, i);
4360                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4361                            btrfs_item_offset_nr(buf, i - 1)) {
4362                         if (btrfs_item_end_nr(buf, i) >
4363                             btrfs_item_offset_nr(buf, i - 1)) {
4364                                 ret = delete_bogus_item(trans, root, path,
4365                                                         buf, i);
4366                                 if (!ret)
4367                                         goto again;
4368                                 fprintf(stderr, "items overlap, can't fix\n");
4369                                 ret = -EIO;
4370                                 break;
4371                         }
4372                         shift = btrfs_item_offset_nr(buf, i - 1) -
4373                                 btrfs_item_end_nr(buf, i);
4374                 }
4375                 if (!shift)
4376                         continue;
4377
4378                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4379                        i, shift, (unsigned long long)buf->start);
4380                 offset = btrfs_item_offset_nr(buf, i);
4381                 memmove_extent_buffer(buf,
4382                                       btrfs_leaf_data(buf) + offset + shift,
4383                                       btrfs_leaf_data(buf) + offset,
4384                                       btrfs_item_size_nr(buf, i));
4385                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4386                                       offset + shift);
4387                 btrfs_mark_buffer_dirty(buf);
4388         }
4389
4390         /*
4391          * We may have moved things, in which case we want to exit so we don't
4392          * write those changes out.  Once we have proper abort functionality in
4393          * progs this can be changed to something nicer.
4394          */
4395         BUG_ON(ret);
4396         return ret;
4397 }
4398
4399 /*
4400  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4401  * then just return -EIO.
4402  */
4403 static int try_to_fix_bad_block(struct btrfs_root *root,
4404                                 struct extent_buffer *buf,
4405                                 enum btrfs_tree_block_status status)
4406 {
4407         struct btrfs_trans_handle *trans;
4408         struct ulist *roots;
4409         struct ulist_node *node;
4410         struct btrfs_root *search_root;
4411         struct btrfs_path *path;
4412         struct ulist_iterator iter;
4413         struct btrfs_key root_key, key;
4414         int ret;
4415
4416         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4417             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4418                 return -EIO;
4419
4420         path = btrfs_alloc_path();
4421         if (!path)
4422                 return -EIO;
4423
4424         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4425                                    0, &roots);
4426         if (ret) {
4427                 btrfs_free_path(path);
4428                 return -EIO;
4429         }
4430
4431         ULIST_ITER_INIT(&iter);
4432         while ((node = ulist_next(roots, &iter))) {
4433                 root_key.objectid = node->val;
4434                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4435                 root_key.offset = (u64)-1;
4436
4437                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4438                 if (IS_ERR(root)) {
4439                         ret = -EIO;
4440                         break;
4441                 }
4442
4443
4444                 trans = btrfs_start_transaction(search_root, 0);
4445                 if (IS_ERR(trans)) {
4446                         ret = PTR_ERR(trans);
4447                         break;
4448                 }
4449
4450                 path->lowest_level = btrfs_header_level(buf);
4451                 path->skip_check_block = 1;
4452                 if (path->lowest_level)
4453                         btrfs_node_key_to_cpu(buf, &key, 0);
4454                 else
4455                         btrfs_item_key_to_cpu(buf, &key, 0);
4456                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4457                 if (ret) {
4458                         ret = -EIO;
4459                         btrfs_commit_transaction(trans, search_root);
4460                         break;
4461                 }
4462                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4463                         ret = fix_key_order(trans, search_root, path);
4464                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4465                         ret = fix_item_offset(trans, search_root, path);
4466                 if (ret) {
4467                         btrfs_commit_transaction(trans, search_root);
4468                         break;
4469                 }
4470                 btrfs_release_path(path);
4471                 btrfs_commit_transaction(trans, search_root);
4472         }
4473         ulist_free(roots);
4474         btrfs_free_path(path);
4475         return ret;
4476 }
4477
4478 static int check_block(struct btrfs_root *root,
4479                        struct cache_tree *extent_cache,
4480                        struct extent_buffer *buf, u64 flags)
4481 {
4482         struct extent_record *rec;
4483         struct cache_extent *cache;
4484         struct btrfs_key key;
4485         enum btrfs_tree_block_status status;
4486         int ret = 0;
4487         int level;
4488
4489         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4490         if (!cache)
4491                 return 1;
4492         rec = container_of(cache, struct extent_record, cache);
4493         rec->generation = btrfs_header_generation(buf);
4494
4495         level = btrfs_header_level(buf);
4496         if (btrfs_header_nritems(buf) > 0) {
4497
4498                 if (level == 0)
4499                         btrfs_item_key_to_cpu(buf, &key, 0);
4500                 else
4501                         btrfs_node_key_to_cpu(buf, &key, 0);
4502
4503                 rec->info_objectid = key.objectid;
4504         }
4505         rec->info_level = level;
4506
4507         if (btrfs_is_leaf(buf))
4508                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4509         else
4510                 status = btrfs_check_node(root, &rec->parent_key, buf);
4511
4512         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4513                 if (repair)
4514                         status = try_to_fix_bad_block(root, buf, status);
4515                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4516                         ret = -EIO;
4517                         fprintf(stderr, "bad block %llu\n",
4518                                 (unsigned long long)buf->start);
4519                 } else {
4520                         /*
4521                          * Signal to callers we need to start the scan over
4522                          * again since we'll have cowed blocks.
4523                          */
4524                         ret = -EAGAIN;
4525                 }
4526         } else {
4527                 rec->content_checked = 1;
4528                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4529                         rec->owner_ref_checked = 1;
4530                 else {
4531                         ret = check_owner_ref(root, rec, buf);
4532                         if (!ret)
4533                                 rec->owner_ref_checked = 1;
4534                 }
4535         }
4536         if (!ret)
4537                 maybe_free_extent_rec(extent_cache, rec);
4538         return ret;
4539 }
4540
4541
4542 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4543                                                 u64 parent, u64 root)
4544 {
4545         struct rb_node *node;
4546         struct tree_backref *back = NULL;
4547         struct tree_backref match = {
4548                 .node = {
4549                         .is_data = 0,
4550                 },
4551         };
4552
4553         if (parent) {
4554                 match.parent = parent;
4555                 match.node.full_backref = 1;
4556         } else {
4557                 match.root = root;
4558         }
4559
4560         node = rb_search(&rec->backref_tree, &match.node.node,
4561                          (rb_compare_keys)compare_extent_backref, NULL);
4562         if (node)
4563                 back = to_tree_backref(rb_node_to_extent_backref(node));
4564
4565         return back;
4566 }
4567
4568 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4569                                                 u64 parent, u64 root)
4570 {
4571         struct tree_backref *ref = malloc(sizeof(*ref));
4572
4573         if (!ref)
4574                 return NULL;
4575         memset(&ref->node, 0, sizeof(ref->node));
4576         if (parent > 0) {
4577                 ref->parent = parent;
4578                 ref->node.full_backref = 1;
4579         } else {
4580                 ref->root = root;
4581                 ref->node.full_backref = 0;
4582         }
4583         list_add_tail(&ref->node.list, &rec->backrefs);
4584         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4585
4586         return ref;
4587 }
4588
4589 static struct data_backref *find_data_backref(struct extent_record *rec,
4590                                                 u64 parent, u64 root,
4591                                                 u64 owner, u64 offset,
4592                                                 int found_ref,
4593                                                 u64 disk_bytenr, u64 bytes)
4594 {
4595         struct rb_node *node;
4596         struct data_backref *back = NULL;
4597         struct data_backref match = {
4598                 .node = {
4599                         .is_data = 1,
4600                 },
4601                 .owner = owner,
4602                 .offset = offset,
4603                 .bytes = bytes,
4604                 .found_ref = found_ref,
4605                 .disk_bytenr = disk_bytenr,
4606         };
4607
4608         if (parent) {
4609                 match.parent = parent;
4610                 match.node.full_backref = 1;
4611         } else {
4612                 match.root = root;
4613         }
4614
4615         node = rb_search(&rec->backref_tree, &match.node.node,
4616                          (rb_compare_keys)compare_extent_backref, NULL);
4617         if (node)
4618                 back = to_data_backref(rb_node_to_extent_backref(node));
4619
4620         return back;
4621 }
4622
4623 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4624                                                 u64 parent, u64 root,
4625                                                 u64 owner, u64 offset,
4626                                                 u64 max_size)
4627 {
4628         struct data_backref *ref = malloc(sizeof(*ref));
4629
4630         if (!ref)
4631                 return NULL;
4632         memset(&ref->node, 0, sizeof(ref->node));
4633         ref->node.is_data = 1;
4634
4635         if (parent > 0) {
4636                 ref->parent = parent;
4637                 ref->owner = 0;
4638                 ref->offset = 0;
4639                 ref->node.full_backref = 1;
4640         } else {
4641                 ref->root = root;
4642                 ref->owner = owner;
4643                 ref->offset = offset;
4644                 ref->node.full_backref = 0;
4645         }
4646         ref->bytes = max_size;
4647         ref->found_ref = 0;
4648         ref->num_refs = 0;
4649         list_add_tail(&ref->node.list, &rec->backrefs);
4650         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4651         if (max_size > rec->max_size)
4652                 rec->max_size = max_size;
4653         return ref;
4654 }
4655
4656 /* Check if the type of extent matches with its chunk */
4657 static void check_extent_type(struct extent_record *rec)
4658 {
4659         struct btrfs_block_group_cache *bg_cache;
4660
4661         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4662         if (!bg_cache)
4663                 return;
4664
4665         /* data extent, check chunk directly*/
4666         if (!rec->metadata) {
4667                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4668                         rec->wrong_chunk_type = 1;
4669                 return;
4670         }
4671
4672         /* metadata extent, check the obvious case first */
4673         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4674                                  BTRFS_BLOCK_GROUP_METADATA))) {
4675                 rec->wrong_chunk_type = 1;
4676                 return;
4677         }
4678
4679         /*
4680          * Check SYSTEM extent, as it's also marked as metadata, we can only
4681          * make sure it's a SYSTEM extent by its backref
4682          */
4683         if (!list_empty(&rec->backrefs)) {
4684                 struct extent_backref *node;
4685                 struct tree_backref *tback;
4686                 u64 bg_type;
4687
4688                 node = to_extent_backref(rec->backrefs.next);
4689                 if (node->is_data) {
4690                         /* tree block shouldn't have data backref */
4691                         rec->wrong_chunk_type = 1;
4692                         return;
4693                 }
4694                 tback = container_of(node, struct tree_backref, node);
4695
4696                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4697                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4698                 else
4699                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4700                 if (!(bg_cache->flags & bg_type))
4701                         rec->wrong_chunk_type = 1;
4702         }
4703 }
4704
4705 /*
4706  * Allocate a new extent record, fill default values from @tmpl and insert int
4707  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4708  * the cache, otherwise it fails.
4709  */
4710 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4711                 struct extent_record *tmpl)
4712 {
4713         struct extent_record *rec;
4714         int ret = 0;
4715
4716         rec = malloc(sizeof(*rec));
4717         if (!rec)
4718                 return -ENOMEM;
4719         rec->start = tmpl->start;
4720         rec->max_size = tmpl->max_size;
4721         rec->nr = max(tmpl->nr, tmpl->max_size);
4722         rec->found_rec = tmpl->found_rec;
4723         rec->content_checked = tmpl->content_checked;
4724         rec->owner_ref_checked = tmpl->owner_ref_checked;
4725         rec->num_duplicates = 0;
4726         rec->metadata = tmpl->metadata;
4727         rec->flag_block_full_backref = FLAG_UNSET;
4728         rec->bad_full_backref = 0;
4729         rec->crossing_stripes = 0;
4730         rec->wrong_chunk_type = 0;
4731         rec->is_root = tmpl->is_root;
4732         rec->refs = tmpl->refs;
4733         rec->extent_item_refs = tmpl->extent_item_refs;
4734         rec->parent_generation = tmpl->parent_generation;
4735         INIT_LIST_HEAD(&rec->backrefs);
4736         INIT_LIST_HEAD(&rec->dups);
4737         INIT_LIST_HEAD(&rec->list);
4738         rec->backref_tree = RB_ROOT;
4739         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4740         rec->cache.start = tmpl->start;
4741         rec->cache.size = tmpl->nr;
4742         ret = insert_cache_extent(extent_cache, &rec->cache);
4743         BUG_ON(ret);
4744         bytes_used += rec->nr;
4745
4746         if (tmpl->metadata)
4747                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4748                                 global_info->tree_root->nodesize);
4749         check_extent_type(rec);
4750         return ret;
4751 }
4752
4753 /*
4754  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4755  * some are hints:
4756  * - refs              - if found, increase refs
4757  * - is_root           - if found, set
4758  * - content_checked   - if found, set
4759  * - owner_ref_checked - if found, set
4760  *
4761  * If not found, create a new one, initialize and insert.
4762  */
4763 static int add_extent_rec(struct cache_tree *extent_cache,
4764                 struct extent_record *tmpl)
4765 {
4766         struct extent_record *rec;
4767         struct cache_extent *cache;
4768         int ret = 0;
4769         int dup = 0;
4770
4771         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4772         if (cache) {
4773                 rec = container_of(cache, struct extent_record, cache);
4774                 if (tmpl->refs)
4775                         rec->refs++;
4776                 if (rec->nr == 1)
4777                         rec->nr = max(tmpl->nr, tmpl->max_size);
4778
4779                 /*
4780                  * We need to make sure to reset nr to whatever the extent
4781                  * record says was the real size, this way we can compare it to
4782                  * the backrefs.
4783                  */
4784                 if (tmpl->found_rec) {
4785                         if (tmpl->start != rec->start || rec->found_rec) {
4786                                 struct extent_record *tmp;
4787
4788                                 dup = 1;
4789                                 if (list_empty(&rec->list))
4790                                         list_add_tail(&rec->list,
4791                                                       &duplicate_extents);
4792
4793                                 /*
4794                                  * We have to do this song and dance in case we
4795                                  * find an extent record that falls inside of
4796                                  * our current extent record but does not have
4797                                  * the same objectid.
4798                                  */
4799                                 tmp = malloc(sizeof(*tmp));
4800                                 if (!tmp)
4801                                         return -ENOMEM;
4802                                 tmp->start = tmpl->start;
4803                                 tmp->max_size = tmpl->max_size;
4804                                 tmp->nr = tmpl->nr;
4805                                 tmp->found_rec = 1;
4806                                 tmp->metadata = tmpl->metadata;
4807                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4808                                 INIT_LIST_HEAD(&tmp->list);
4809                                 list_add_tail(&tmp->list, &rec->dups);
4810                                 rec->num_duplicates++;
4811                         } else {
4812                                 rec->nr = tmpl->nr;
4813                                 rec->found_rec = 1;
4814                         }
4815                 }
4816
4817                 if (tmpl->extent_item_refs && !dup) {
4818                         if (rec->extent_item_refs) {
4819                                 fprintf(stderr, "block %llu rec "
4820                                         "extent_item_refs %llu, passed %llu\n",
4821                                         (unsigned long long)tmpl->start,
4822                                         (unsigned long long)
4823                                                         rec->extent_item_refs,
4824                                         (unsigned long long)tmpl->extent_item_refs);
4825                         }
4826                         rec->extent_item_refs = tmpl->extent_item_refs;
4827                 }
4828                 if (tmpl->is_root)
4829                         rec->is_root = 1;
4830                 if (tmpl->content_checked)
4831                         rec->content_checked = 1;
4832                 if (tmpl->owner_ref_checked)
4833                         rec->owner_ref_checked = 1;
4834                 memcpy(&rec->parent_key, &tmpl->parent_key,
4835                                 sizeof(tmpl->parent_key));
4836                 if (tmpl->parent_generation)
4837                         rec->parent_generation = tmpl->parent_generation;
4838                 if (rec->max_size < tmpl->max_size)
4839                         rec->max_size = tmpl->max_size;
4840
4841                 /*
4842                  * A metadata extent can't cross stripe_len boundary, otherwise
4843                  * kernel scrub won't be able to handle it.
4844                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4845                  * it.
4846                  */
4847                 if (tmpl->metadata)
4848                         rec->crossing_stripes = check_crossing_stripes(
4849                                 rec->start, global_info->tree_root->nodesize);
4850                 check_extent_type(rec);
4851                 maybe_free_extent_rec(extent_cache, rec);
4852                 return ret;
4853         }
4854
4855         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4856
4857         return ret;
4858 }
4859
4860 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4861                             u64 parent, u64 root, int found_ref)
4862 {
4863         struct extent_record *rec;
4864         struct tree_backref *back;
4865         struct cache_extent *cache;
4866
4867         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4868         if (!cache) {
4869                 struct extent_record tmpl;
4870
4871                 memset(&tmpl, 0, sizeof(tmpl));
4872                 tmpl.start = bytenr;
4873                 tmpl.nr = 1;
4874                 tmpl.metadata = 1;
4875
4876                 add_extent_rec_nolookup(extent_cache, &tmpl);
4877
4878                 /* really a bug in cache_extent implement now */
4879                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4880                 if (!cache)
4881                         return -ENOENT;
4882         }
4883
4884         rec = container_of(cache, struct extent_record, cache);
4885         if (rec->start != bytenr) {
4886                 /*
4887                  * Several cause, from unaligned bytenr to over lapping extents
4888                  */
4889                 return -EEXIST;
4890         }
4891
4892         back = find_tree_backref(rec, parent, root);
4893         if (!back) {
4894                 back = alloc_tree_backref(rec, parent, root);
4895                 if (!back)
4896                         return -ENOMEM;
4897         }
4898
4899         if (found_ref) {
4900                 if (back->node.found_ref) {
4901                         fprintf(stderr, "Extent back ref already exists "
4902                                 "for %llu parent %llu root %llu \n",
4903                                 (unsigned long long)bytenr,
4904                                 (unsigned long long)parent,
4905                                 (unsigned long long)root);
4906                 }
4907                 back->node.found_ref = 1;
4908         } else {
4909                 if (back->node.found_extent_tree) {
4910                         fprintf(stderr, "Extent back ref already exists "
4911                                 "for %llu parent %llu root %llu \n",
4912                                 (unsigned long long)bytenr,
4913                                 (unsigned long long)parent,
4914                                 (unsigned long long)root);
4915                 }
4916                 back->node.found_extent_tree = 1;
4917         }
4918         check_extent_type(rec);
4919         maybe_free_extent_rec(extent_cache, rec);
4920         return 0;
4921 }
4922
4923 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4924                             u64 parent, u64 root, u64 owner, u64 offset,
4925                             u32 num_refs, int found_ref, u64 max_size)
4926 {
4927         struct extent_record *rec;
4928         struct data_backref *back;
4929         struct cache_extent *cache;
4930
4931         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4932         if (!cache) {
4933                 struct extent_record tmpl;
4934
4935                 memset(&tmpl, 0, sizeof(tmpl));
4936                 tmpl.start = bytenr;
4937                 tmpl.nr = 1;
4938                 tmpl.max_size = max_size;
4939
4940                 add_extent_rec_nolookup(extent_cache, &tmpl);
4941
4942                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4943                 if (!cache)
4944                         abort();
4945         }
4946
4947         rec = container_of(cache, struct extent_record, cache);
4948         if (rec->max_size < max_size)
4949                 rec->max_size = max_size;
4950
4951         /*
4952          * If found_ref is set then max_size is the real size and must match the
4953          * existing refs.  So if we have already found a ref then we need to
4954          * make sure that this ref matches the existing one, otherwise we need
4955          * to add a new backref so we can notice that the backrefs don't match
4956          * and we need to figure out who is telling the truth.  This is to
4957          * account for that awful fsync bug I introduced where we'd end up with
4958          * a btrfs_file_extent_item that would have its length include multiple
4959          * prealloc extents or point inside of a prealloc extent.
4960          */
4961         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4962                                  bytenr, max_size);
4963         if (!back) {
4964                 back = alloc_data_backref(rec, parent, root, owner, offset,
4965                                           max_size);
4966                 BUG_ON(!back);
4967         }
4968
4969         if (found_ref) {
4970                 BUG_ON(num_refs != 1);
4971                 if (back->node.found_ref)
4972                         BUG_ON(back->bytes != max_size);
4973                 back->node.found_ref = 1;
4974                 back->found_ref += 1;
4975                 back->bytes = max_size;
4976                 back->disk_bytenr = bytenr;
4977                 rec->refs += 1;
4978                 rec->content_checked = 1;
4979                 rec->owner_ref_checked = 1;
4980         } else {
4981                 if (back->node.found_extent_tree) {
4982                         fprintf(stderr, "Extent back ref already exists "
4983                                 "for %llu parent %llu root %llu "
4984                                 "owner %llu offset %llu num_refs %lu\n",
4985                                 (unsigned long long)bytenr,
4986                                 (unsigned long long)parent,
4987                                 (unsigned long long)root,
4988                                 (unsigned long long)owner,
4989                                 (unsigned long long)offset,
4990                                 (unsigned long)num_refs);
4991                 }
4992                 back->num_refs = num_refs;
4993                 back->node.found_extent_tree = 1;
4994         }
4995         maybe_free_extent_rec(extent_cache, rec);
4996         return 0;
4997 }
4998
4999 static int add_pending(struct cache_tree *pending,
5000                        struct cache_tree *seen, u64 bytenr, u32 size)
5001 {
5002         int ret;
5003         ret = add_cache_extent(seen, bytenr, size);
5004         if (ret)
5005                 return ret;
5006         add_cache_extent(pending, bytenr, size);
5007         return 0;
5008 }
5009
5010 static int pick_next_pending(struct cache_tree *pending,
5011                         struct cache_tree *reada,
5012                         struct cache_tree *nodes,
5013                         u64 last, struct block_info *bits, int bits_nr,
5014                         int *reada_bits)
5015 {
5016         unsigned long node_start = last;
5017         struct cache_extent *cache;
5018         int ret;
5019
5020         cache = search_cache_extent(reada, 0);
5021         if (cache) {
5022                 bits[0].start = cache->start;
5023                 bits[0].size = cache->size;
5024                 *reada_bits = 1;
5025                 return 1;
5026         }
5027         *reada_bits = 0;
5028         if (node_start > 32768)
5029                 node_start -= 32768;
5030
5031         cache = search_cache_extent(nodes, node_start);
5032         if (!cache)
5033                 cache = search_cache_extent(nodes, 0);
5034
5035         if (!cache) {
5036                  cache = search_cache_extent(pending, 0);
5037                  if (!cache)
5038                          return 0;
5039                  ret = 0;
5040                  do {
5041                          bits[ret].start = cache->start;
5042                          bits[ret].size = cache->size;
5043                          cache = next_cache_extent(cache);
5044                          ret++;
5045                  } while (cache && ret < bits_nr);
5046                  return ret;
5047         }
5048
5049         ret = 0;
5050         do {
5051                 bits[ret].start = cache->start;
5052                 bits[ret].size = cache->size;
5053                 cache = next_cache_extent(cache);
5054                 ret++;
5055         } while (cache && ret < bits_nr);
5056
5057         if (bits_nr - ret > 8) {
5058                 u64 lookup = bits[0].start + bits[0].size;
5059                 struct cache_extent *next;
5060                 next = search_cache_extent(pending, lookup);
5061                 while(next) {
5062                         if (next->start - lookup > 32768)
5063                                 break;
5064                         bits[ret].start = next->start;
5065                         bits[ret].size = next->size;
5066                         lookup = next->start + next->size;
5067                         ret++;
5068                         if (ret == bits_nr)
5069                                 break;
5070                         next = next_cache_extent(next);
5071                         if (!next)
5072                                 break;
5073                 }
5074         }
5075         return ret;
5076 }
5077
5078 static void free_chunk_record(struct cache_extent *cache)
5079 {
5080         struct chunk_record *rec;
5081
5082         rec = container_of(cache, struct chunk_record, cache);
5083         list_del_init(&rec->list);
5084         list_del_init(&rec->dextents);
5085         free(rec);
5086 }
5087
5088 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5089 {
5090         cache_tree_free_extents(chunk_cache, free_chunk_record);
5091 }
5092
5093 static void free_device_record(struct rb_node *node)
5094 {
5095         struct device_record *rec;
5096
5097         rec = container_of(node, struct device_record, node);
5098         free(rec);
5099 }
5100
5101 FREE_RB_BASED_TREE(device_cache, free_device_record);
5102
5103 int insert_block_group_record(struct block_group_tree *tree,
5104                               struct block_group_record *bg_rec)
5105 {
5106         int ret;
5107
5108         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5109         if (ret)
5110                 return ret;
5111
5112         list_add_tail(&bg_rec->list, &tree->block_groups);
5113         return 0;
5114 }
5115
5116 static void free_block_group_record(struct cache_extent *cache)
5117 {
5118         struct block_group_record *rec;
5119
5120         rec = container_of(cache, struct block_group_record, cache);
5121         list_del_init(&rec->list);
5122         free(rec);
5123 }
5124
5125 void free_block_group_tree(struct block_group_tree *tree)
5126 {
5127         cache_tree_free_extents(&tree->tree, free_block_group_record);
5128 }
5129
5130 int insert_device_extent_record(struct device_extent_tree *tree,
5131                                 struct device_extent_record *de_rec)
5132 {
5133         int ret;
5134
5135         /*
5136          * Device extent is a bit different from the other extents, because
5137          * the extents which belong to the different devices may have the
5138          * same start and size, so we need use the special extent cache
5139          * search/insert functions.
5140          */
5141         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5142         if (ret)
5143                 return ret;
5144
5145         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5146         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5147         return 0;
5148 }
5149
5150 static void free_device_extent_record(struct cache_extent *cache)
5151 {
5152         struct device_extent_record *rec;
5153
5154         rec = container_of(cache, struct device_extent_record, cache);
5155         if (!list_empty(&rec->chunk_list))
5156                 list_del_init(&rec->chunk_list);
5157         if (!list_empty(&rec->device_list))
5158                 list_del_init(&rec->device_list);
5159         free(rec);
5160 }
5161
5162 void free_device_extent_tree(struct device_extent_tree *tree)
5163 {
5164         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5165 }
5166
5167 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5168 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5169                                  struct extent_buffer *leaf, int slot)
5170 {
5171         struct btrfs_extent_ref_v0 *ref0;
5172         struct btrfs_key key;
5173         int ret;
5174
5175         btrfs_item_key_to_cpu(leaf, &key, slot);
5176         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5177         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5178                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5179                                 0, 0);
5180         } else {
5181                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5182                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5183         }
5184         return ret;
5185 }
5186 #endif
5187
5188 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5189                                             struct btrfs_key *key,
5190                                             int slot)
5191 {
5192         struct btrfs_chunk *ptr;
5193         struct chunk_record *rec;
5194         int num_stripes, i;
5195
5196         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5197         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5198
5199         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5200         if (!rec) {
5201                 fprintf(stderr, "memory allocation failed\n");
5202                 exit(-1);
5203         }
5204
5205         INIT_LIST_HEAD(&rec->list);
5206         INIT_LIST_HEAD(&rec->dextents);
5207         rec->bg_rec = NULL;
5208
5209         rec->cache.start = key->offset;
5210         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5211
5212         rec->generation = btrfs_header_generation(leaf);
5213
5214         rec->objectid = key->objectid;
5215         rec->type = key->type;
5216         rec->offset = key->offset;
5217
5218         rec->length = rec->cache.size;
5219         rec->owner = btrfs_chunk_owner(leaf, ptr);
5220         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5221         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5222         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5223         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5224         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5225         rec->num_stripes = num_stripes;
5226         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5227
5228         for (i = 0; i < rec->num_stripes; ++i) {
5229                 rec->stripes[i].devid =
5230                         btrfs_stripe_devid_nr(leaf, ptr, i);
5231                 rec->stripes[i].offset =
5232                         btrfs_stripe_offset_nr(leaf, ptr, i);
5233                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5234                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5235                                 BTRFS_UUID_SIZE);
5236         }
5237
5238         return rec;
5239 }
5240
5241 static int process_chunk_item(struct cache_tree *chunk_cache,
5242                               struct btrfs_key *key, struct extent_buffer *eb,
5243                               int slot)
5244 {
5245         struct chunk_record *rec;
5246         struct btrfs_chunk *chunk;
5247         int ret = 0;
5248
5249         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5250         /*
5251          * Do extra check for this chunk item,
5252          *
5253          * It's still possible one can craft a leaf with CHUNK_ITEM, with
5254          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5255          * and owner<->key_type check.
5256          */
5257         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5258                                       key->offset);
5259         if (ret < 0) {
5260                 error("chunk(%llu, %llu) is not valid, ignore it",
5261                       key->offset, btrfs_chunk_length(eb, chunk));
5262                 return 0;
5263         }
5264         rec = btrfs_new_chunk_record(eb, key, slot);
5265         ret = insert_cache_extent(chunk_cache, &rec->cache);
5266         if (ret) {
5267                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5268                         rec->offset, rec->length);
5269                 free(rec);
5270         }
5271
5272         return ret;
5273 }
5274
5275 static int process_device_item(struct rb_root *dev_cache,
5276                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5277 {
5278         struct btrfs_dev_item *ptr;
5279         struct device_record *rec;
5280         int ret = 0;
5281
5282         ptr = btrfs_item_ptr(eb,
5283                 slot, struct btrfs_dev_item);
5284
5285         rec = malloc(sizeof(*rec));
5286         if (!rec) {
5287                 fprintf(stderr, "memory allocation failed\n");
5288                 return -ENOMEM;
5289         }
5290
5291         rec->devid = key->offset;
5292         rec->generation = btrfs_header_generation(eb);
5293
5294         rec->objectid = key->objectid;
5295         rec->type = key->type;
5296         rec->offset = key->offset;
5297
5298         rec->devid = btrfs_device_id(eb, ptr);
5299         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5300         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5301
5302         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5303         if (ret) {
5304                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5305                 free(rec);
5306         }
5307
5308         return ret;
5309 }
5310
5311 struct block_group_record *
5312 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5313                              int slot)
5314 {
5315         struct btrfs_block_group_item *ptr;
5316         struct block_group_record *rec;
5317
5318         rec = calloc(1, sizeof(*rec));
5319         if (!rec) {
5320                 fprintf(stderr, "memory allocation failed\n");
5321                 exit(-1);
5322         }
5323
5324         rec->cache.start = key->objectid;
5325         rec->cache.size = key->offset;
5326
5327         rec->generation = btrfs_header_generation(leaf);
5328
5329         rec->objectid = key->objectid;
5330         rec->type = key->type;
5331         rec->offset = key->offset;
5332
5333         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5334         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5335
5336         INIT_LIST_HEAD(&rec->list);
5337
5338         return rec;
5339 }
5340
5341 static int process_block_group_item(struct block_group_tree *block_group_cache,
5342                                     struct btrfs_key *key,
5343                                     struct extent_buffer *eb, int slot)
5344 {
5345         struct block_group_record *rec;
5346         int ret = 0;
5347
5348         rec = btrfs_new_block_group_record(eb, key, slot);
5349         ret = insert_block_group_record(block_group_cache, rec);
5350         if (ret) {
5351                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5352                         rec->objectid, rec->offset);
5353                 free(rec);
5354         }
5355
5356         return ret;
5357 }
5358
5359 struct device_extent_record *
5360 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5361                                struct btrfs_key *key, int slot)
5362 {
5363         struct device_extent_record *rec;
5364         struct btrfs_dev_extent *ptr;
5365
5366         rec = calloc(1, sizeof(*rec));
5367         if (!rec) {
5368                 fprintf(stderr, "memory allocation failed\n");
5369                 exit(-1);
5370         }
5371
5372         rec->cache.objectid = key->objectid;
5373         rec->cache.start = key->offset;
5374
5375         rec->generation = btrfs_header_generation(leaf);
5376
5377         rec->objectid = key->objectid;
5378         rec->type = key->type;
5379         rec->offset = key->offset;
5380
5381         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5382         rec->chunk_objecteid =
5383                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5384         rec->chunk_offset =
5385                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5386         rec->length = btrfs_dev_extent_length(leaf, ptr);
5387         rec->cache.size = rec->length;
5388
5389         INIT_LIST_HEAD(&rec->chunk_list);
5390         INIT_LIST_HEAD(&rec->device_list);
5391
5392         return rec;
5393 }
5394
5395 static int
5396 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5397                            struct btrfs_key *key, struct extent_buffer *eb,
5398                            int slot)
5399 {
5400         struct device_extent_record *rec;
5401         int ret;
5402
5403         rec = btrfs_new_device_extent_record(eb, key, slot);
5404         ret = insert_device_extent_record(dev_extent_cache, rec);
5405         if (ret) {
5406                 fprintf(stderr,
5407                         "Device extent[%llu, %llu, %llu] existed.\n",
5408                         rec->objectid, rec->offset, rec->length);
5409                 free(rec);
5410         }
5411
5412         return ret;
5413 }
5414
5415 static int process_extent_item(struct btrfs_root *root,
5416                                struct cache_tree *extent_cache,
5417                                struct extent_buffer *eb, int slot)
5418 {
5419         struct btrfs_extent_item *ei;
5420         struct btrfs_extent_inline_ref *iref;
5421         struct btrfs_extent_data_ref *dref;
5422         struct btrfs_shared_data_ref *sref;
5423         struct btrfs_key key;
5424         struct extent_record tmpl;
5425         unsigned long end;
5426         unsigned long ptr;
5427         int ret;
5428         int type;
5429         u32 item_size = btrfs_item_size_nr(eb, slot);
5430         u64 refs = 0;
5431         u64 offset;
5432         u64 num_bytes;
5433         int metadata = 0;
5434
5435         btrfs_item_key_to_cpu(eb, &key, slot);
5436
5437         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5438                 metadata = 1;
5439                 num_bytes = root->nodesize;
5440         } else {
5441                 num_bytes = key.offset;
5442         }
5443
5444         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5445                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5446                       key.objectid, root->sectorsize);
5447                 return -EIO;
5448         }
5449         if (item_size < sizeof(*ei)) {
5450 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5451                 struct btrfs_extent_item_v0 *ei0;
5452                 BUG_ON(item_size != sizeof(*ei0));
5453                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5454                 refs = btrfs_extent_refs_v0(eb, ei0);
5455 #else
5456                 BUG();
5457 #endif
5458                 memset(&tmpl, 0, sizeof(tmpl));
5459                 tmpl.start = key.objectid;
5460                 tmpl.nr = num_bytes;
5461                 tmpl.extent_item_refs = refs;
5462                 tmpl.metadata = metadata;
5463                 tmpl.found_rec = 1;
5464                 tmpl.max_size = num_bytes;
5465
5466                 return add_extent_rec(extent_cache, &tmpl);
5467         }
5468
5469         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5470         refs = btrfs_extent_refs(eb, ei);
5471         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5472                 metadata = 1;
5473         else
5474                 metadata = 0;
5475         if (metadata && num_bytes != root->nodesize) {
5476                 error("ignore invalid metadata extent, length %llu does not equal to %u",
5477                       num_bytes, root->nodesize);
5478                 return -EIO;
5479         }
5480         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5481                 error("ignore invalid data extent, length %llu is not aligned to %u",
5482                       num_bytes, root->sectorsize);
5483                 return -EIO;
5484         }
5485
5486         memset(&tmpl, 0, sizeof(tmpl));
5487         tmpl.start = key.objectid;
5488         tmpl.nr = num_bytes;
5489         tmpl.extent_item_refs = refs;
5490         tmpl.metadata = metadata;
5491         tmpl.found_rec = 1;
5492         tmpl.max_size = num_bytes;
5493         add_extent_rec(extent_cache, &tmpl);
5494
5495         ptr = (unsigned long)(ei + 1);
5496         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5497             key.type == BTRFS_EXTENT_ITEM_KEY)
5498                 ptr += sizeof(struct btrfs_tree_block_info);
5499
5500         end = (unsigned long)ei + item_size;
5501         while (ptr < end) {
5502                 iref = (struct btrfs_extent_inline_ref *)ptr;
5503                 type = btrfs_extent_inline_ref_type(eb, iref);
5504                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5505                 switch (type) {
5506                 case BTRFS_TREE_BLOCK_REF_KEY:
5507                         ret = add_tree_backref(extent_cache, key.objectid,
5508                                         0, offset, 0);
5509                         if (ret < 0)
5510                                 error("add_tree_backref failed: %s",
5511                                       strerror(-ret));
5512                         break;
5513                 case BTRFS_SHARED_BLOCK_REF_KEY:
5514                         ret = add_tree_backref(extent_cache, key.objectid,
5515                                         offset, 0, 0);
5516                         if (ret < 0)
5517                                 error("add_tree_backref failed: %s",
5518                                       strerror(-ret));
5519                         break;
5520                 case BTRFS_EXTENT_DATA_REF_KEY:
5521                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5522                         add_data_backref(extent_cache, key.objectid, 0,
5523                                         btrfs_extent_data_ref_root(eb, dref),
5524                                         btrfs_extent_data_ref_objectid(eb,
5525                                                                        dref),
5526                                         btrfs_extent_data_ref_offset(eb, dref),
5527                                         btrfs_extent_data_ref_count(eb, dref),
5528                                         0, num_bytes);
5529                         break;
5530                 case BTRFS_SHARED_DATA_REF_KEY:
5531                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5532                         add_data_backref(extent_cache, key.objectid, offset,
5533                                         0, 0, 0,
5534                                         btrfs_shared_data_ref_count(eb, sref),
5535                                         0, num_bytes);
5536                         break;
5537                 default:
5538                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5539                                 key.objectid, key.type, num_bytes);
5540                         goto out;
5541                 }
5542                 ptr += btrfs_extent_inline_ref_size(type);
5543         }
5544         WARN_ON(ptr > end);
5545 out:
5546         return 0;
5547 }
5548
5549 static int check_cache_range(struct btrfs_root *root,
5550                              struct btrfs_block_group_cache *cache,
5551                              u64 offset, u64 bytes)
5552 {
5553         struct btrfs_free_space *entry;
5554         u64 *logical;
5555         u64 bytenr;
5556         int stripe_len;
5557         int i, nr, ret;
5558
5559         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5560                 bytenr = btrfs_sb_offset(i);
5561                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5562                                        cache->key.objectid, bytenr, 0,
5563                                        &logical, &nr, &stripe_len);
5564                 if (ret)
5565                         return ret;
5566
5567                 while (nr--) {
5568                         if (logical[nr] + stripe_len <= offset)
5569                                 continue;
5570                         if (offset + bytes <= logical[nr])
5571                                 continue;
5572                         if (logical[nr] == offset) {
5573                                 if (stripe_len >= bytes) {
5574                                         kfree(logical);
5575                                         return 0;
5576                                 }
5577                                 bytes -= stripe_len;
5578                                 offset += stripe_len;
5579                         } else if (logical[nr] < offset) {
5580                                 if (logical[nr] + stripe_len >=
5581                                     offset + bytes) {
5582                                         kfree(logical);
5583                                         return 0;
5584                                 }
5585                                 bytes = (offset + bytes) -
5586                                         (logical[nr] + stripe_len);
5587                                 offset = logical[nr] + stripe_len;
5588                         } else {
5589                                 /*
5590                                  * Could be tricky, the super may land in the
5591                                  * middle of the area we're checking.  First
5592                                  * check the easiest case, it's at the end.
5593                                  */
5594                                 if (logical[nr] + stripe_len >=
5595                                     bytes + offset) {
5596                                         bytes = logical[nr] - offset;
5597                                         continue;
5598                                 }
5599
5600                                 /* Check the left side */
5601                                 ret = check_cache_range(root, cache,
5602                                                         offset,
5603                                                         logical[nr] - offset);
5604                                 if (ret) {
5605                                         kfree(logical);
5606                                         return ret;
5607                                 }
5608
5609                                 /* Now we continue with the right side */
5610                                 bytes = (offset + bytes) -
5611                                         (logical[nr] + stripe_len);
5612                                 offset = logical[nr] + stripe_len;
5613                         }
5614                 }
5615
5616                 kfree(logical);
5617         }
5618
5619         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5620         if (!entry) {
5621                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5622                         offset, offset+bytes);
5623                 return -EINVAL;
5624         }
5625
5626         if (entry->offset != offset) {
5627                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5628                         entry->offset);
5629                 return -EINVAL;
5630         }
5631
5632         if (entry->bytes != bytes) {
5633                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5634                         bytes, entry->bytes, offset);
5635                 return -EINVAL;
5636         }
5637
5638         unlink_free_space(cache->free_space_ctl, entry);
5639         free(entry);
5640         return 0;
5641 }
5642
5643 static int verify_space_cache(struct btrfs_root *root,
5644                               struct btrfs_block_group_cache *cache)
5645 {
5646         struct btrfs_path *path;
5647         struct extent_buffer *leaf;
5648         struct btrfs_key key;
5649         u64 last;
5650         int ret = 0;
5651
5652         path = btrfs_alloc_path();
5653         if (!path)
5654                 return -ENOMEM;
5655
5656         root = root->fs_info->extent_root;
5657
5658         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5659
5660         key.objectid = last;
5661         key.offset = 0;
5662         key.type = BTRFS_EXTENT_ITEM_KEY;
5663
5664         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5665         if (ret < 0)
5666                 goto out;
5667         ret = 0;
5668         while (1) {
5669                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5670                         ret = btrfs_next_leaf(root, path);
5671                         if (ret < 0)
5672                                 goto out;
5673                         if (ret > 0) {
5674                                 ret = 0;
5675                                 break;
5676                         }
5677                 }
5678                 leaf = path->nodes[0];
5679                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5680                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5681                         break;
5682                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5683                     key.type != BTRFS_METADATA_ITEM_KEY) {
5684                         path->slots[0]++;
5685                         continue;
5686                 }
5687
5688                 if (last == key.objectid) {
5689                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5690                                 last = key.objectid + key.offset;
5691                         else
5692                                 last = key.objectid + root->nodesize;
5693                         path->slots[0]++;
5694                         continue;
5695                 }
5696
5697                 ret = check_cache_range(root, cache, last,
5698                                         key.objectid - last);
5699                 if (ret)
5700                         break;
5701                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5702                         last = key.objectid + key.offset;
5703                 else
5704                         last = key.objectid + root->nodesize;
5705                 path->slots[0]++;
5706         }
5707
5708         if (last < cache->key.objectid + cache->key.offset)
5709                 ret = check_cache_range(root, cache, last,
5710                                         cache->key.objectid +
5711                                         cache->key.offset - last);
5712
5713 out:
5714         btrfs_free_path(path);
5715
5716         if (!ret &&
5717             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5718                 fprintf(stderr, "There are still entries left in the space "
5719                         "cache\n");
5720                 ret = -EINVAL;
5721         }
5722
5723         return ret;
5724 }
5725
5726 static int check_space_cache(struct btrfs_root *root)
5727 {
5728         struct btrfs_block_group_cache *cache;
5729         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5730         int ret;
5731         int error = 0;
5732
5733         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5734             btrfs_super_generation(root->fs_info->super_copy) !=
5735             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5736                 printf("cache and super generation don't match, space cache "
5737                        "will be invalidated\n");
5738                 return 0;
5739         }
5740
5741         if (ctx.progress_enabled) {
5742                 ctx.tp = TASK_FREE_SPACE;
5743                 task_start(ctx.info);
5744         }
5745
5746         while (1) {
5747                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5748                 if (!cache)
5749                         break;
5750
5751                 start = cache->key.objectid + cache->key.offset;
5752                 if (!cache->free_space_ctl) {
5753                         if (btrfs_init_free_space_ctl(cache,
5754                                                       root->sectorsize)) {
5755                                 ret = -ENOMEM;
5756                                 break;
5757                         }
5758                 } else {
5759                         btrfs_remove_free_space_cache(cache);
5760                 }
5761
5762                 if (btrfs_fs_compat_ro(root->fs_info,
5763                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5764                         ret = exclude_super_stripes(root, cache);
5765                         if (ret) {
5766                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5767                                         strerror(-ret));
5768                                 error++;
5769                                 continue;
5770                         }
5771                         ret = load_free_space_tree(root->fs_info, cache);
5772                         free_excluded_extents(root, cache);
5773                         if (ret < 0) {
5774                                 fprintf(stderr, "could not load free space tree: %s\n",
5775                                         strerror(-ret));
5776                                 error++;
5777                                 continue;
5778                         }
5779                         error += ret;
5780                 } else {
5781                         ret = load_free_space_cache(root->fs_info, cache);
5782                         if (!ret)
5783                                 continue;
5784                 }
5785
5786                 ret = verify_space_cache(root, cache);
5787                 if (ret) {
5788                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5789                                 cache->key.objectid);
5790                         error++;
5791                 }
5792         }
5793
5794         task_stop(ctx.info);
5795
5796         return error ? -EINVAL : 0;
5797 }
5798
5799 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5800                         u64 num_bytes, unsigned long leaf_offset,
5801                         struct extent_buffer *eb) {
5802
5803         u64 offset = 0;
5804         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5805         char *data;
5806         unsigned long csum_offset;
5807         u32 csum;
5808         u32 csum_expected;
5809         u64 read_len;
5810         u64 data_checked = 0;
5811         u64 tmp;
5812         int ret = 0;
5813         int mirror;
5814         int num_copies;
5815
5816         if (num_bytes % root->sectorsize)
5817                 return -EINVAL;
5818
5819         data = malloc(num_bytes);
5820         if (!data)
5821                 return -ENOMEM;
5822
5823         while (offset < num_bytes) {
5824                 mirror = 0;
5825 again:
5826                 read_len = num_bytes - offset;
5827                 /* read as much space once a time */
5828                 ret = read_extent_data(root, data + offset,
5829                                 bytenr + offset, &read_len, mirror);
5830                 if (ret)
5831                         goto out;
5832                 data_checked = 0;
5833                 /* verify every 4k data's checksum */
5834                 while (data_checked < read_len) {
5835                         csum = ~(u32)0;
5836                         tmp = offset + data_checked;
5837
5838                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5839                                                csum, root->sectorsize);
5840                         btrfs_csum_final(csum, (char *)&csum);
5841
5842                         csum_offset = leaf_offset +
5843                                  tmp / root->sectorsize * csum_size;
5844                         read_extent_buffer(eb, (char *)&csum_expected,
5845                                            csum_offset, csum_size);
5846                         /* try another mirror */
5847                         if (csum != csum_expected) {
5848                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5849                                                 mirror, bytenr + tmp,
5850                                                 csum, csum_expected);
5851                                 num_copies = btrfs_num_copies(
5852                                                 &root->fs_info->mapping_tree,
5853                                                 bytenr, num_bytes);
5854                                 if (mirror < num_copies - 1) {
5855                                         mirror += 1;
5856                                         goto again;
5857                                 }
5858                         }
5859                         data_checked += root->sectorsize;
5860                 }
5861                 offset += read_len;
5862         }
5863 out:
5864         free(data);
5865         return ret;
5866 }
5867
5868 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5869                                u64 num_bytes)
5870 {
5871         struct btrfs_path *path;
5872         struct extent_buffer *leaf;
5873         struct btrfs_key key;
5874         int ret;
5875
5876         path = btrfs_alloc_path();
5877         if (!path) {
5878                 fprintf(stderr, "Error allocating path\n");
5879                 return -ENOMEM;
5880         }
5881
5882         key.objectid = bytenr;
5883         key.type = BTRFS_EXTENT_ITEM_KEY;
5884         key.offset = (u64)-1;
5885
5886 again:
5887         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5888                                 0, 0);
5889         if (ret < 0) {
5890                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5891                 btrfs_free_path(path);
5892                 return ret;
5893         } else if (ret) {
5894                 if (path->slots[0] > 0) {
5895                         path->slots[0]--;
5896                 } else {
5897                         ret = btrfs_prev_leaf(root, path);
5898                         if (ret < 0) {
5899                                 goto out;
5900                         } else if (ret > 0) {
5901                                 ret = 0;
5902                                 goto out;
5903                         }
5904                 }
5905         }
5906
5907         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5908
5909         /*
5910          * Block group items come before extent items if they have the same
5911          * bytenr, so walk back one more just in case.  Dear future traveller,
5912          * first congrats on mastering time travel.  Now if it's not too much
5913          * trouble could you go back to 2006 and tell Chris to make the
5914          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5915          * EXTENT_ITEM_KEY please?
5916          */
5917         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5918                 if (path->slots[0] > 0) {
5919                         path->slots[0]--;
5920                 } else {
5921                         ret = btrfs_prev_leaf(root, path);
5922                         if (ret < 0) {
5923                                 goto out;
5924                         } else if (ret > 0) {
5925                                 ret = 0;
5926                                 goto out;
5927                         }
5928                 }
5929                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5930         }
5931
5932         while (num_bytes) {
5933                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5934                         ret = btrfs_next_leaf(root, path);
5935                         if (ret < 0) {
5936                                 fprintf(stderr, "Error going to next leaf "
5937                                         "%d\n", ret);
5938                                 btrfs_free_path(path);
5939                                 return ret;
5940                         } else if (ret) {
5941                                 break;
5942                         }
5943                 }
5944                 leaf = path->nodes[0];
5945                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5946                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5947                         path->slots[0]++;
5948                         continue;
5949                 }
5950                 if (key.objectid + key.offset < bytenr) {
5951                         path->slots[0]++;
5952                         continue;
5953                 }
5954                 if (key.objectid > bytenr + num_bytes)
5955                         break;
5956
5957                 if (key.objectid == bytenr) {
5958                         if (key.offset >= num_bytes) {
5959                                 num_bytes = 0;
5960                                 break;
5961                         }
5962                         num_bytes -= key.offset;
5963                         bytenr += key.offset;
5964                 } else if (key.objectid < bytenr) {
5965                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5966                                 num_bytes = 0;
5967                                 break;
5968                         }
5969                         num_bytes = (bytenr + num_bytes) -
5970                                 (key.objectid + key.offset);
5971                         bytenr = key.objectid + key.offset;
5972                 } else {
5973                         if (key.objectid + key.offset < bytenr + num_bytes) {
5974                                 u64 new_start = key.objectid + key.offset;
5975                                 u64 new_bytes = bytenr + num_bytes - new_start;
5976
5977                                 /*
5978                                  * Weird case, the extent is in the middle of
5979                                  * our range, we'll have to search one side
5980                                  * and then the other.  Not sure if this happens
5981                                  * in real life, but no harm in coding it up
5982                                  * anyway just in case.
5983                                  */
5984                                 btrfs_release_path(path);
5985                                 ret = check_extent_exists(root, new_start,
5986                                                           new_bytes);
5987                                 if (ret) {
5988                                         fprintf(stderr, "Right section didn't "
5989                                                 "have a record\n");
5990                                         break;
5991                                 }
5992                                 num_bytes = key.objectid - bytenr;
5993                                 goto again;
5994                         }
5995                         num_bytes = key.objectid - bytenr;
5996                 }
5997                 path->slots[0]++;
5998         }
5999         ret = 0;
6000
6001 out:
6002         if (num_bytes && !ret) {
6003                 fprintf(stderr, "There are no extents for csum range "
6004                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6005                 ret = 1;
6006         }
6007
6008         btrfs_free_path(path);
6009         return ret;
6010 }
6011
6012 static int check_csums(struct btrfs_root *root)
6013 {
6014         struct btrfs_path *path;
6015         struct extent_buffer *leaf;
6016         struct btrfs_key key;
6017         u64 offset = 0, num_bytes = 0;
6018         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6019         int errors = 0;
6020         int ret;
6021         u64 data_len;
6022         unsigned long leaf_offset;
6023
6024         root = root->fs_info->csum_root;
6025         if (!extent_buffer_uptodate(root->node)) {
6026                 fprintf(stderr, "No valid csum tree found\n");
6027                 return -ENOENT;
6028         }
6029
6030         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6031         key.type = BTRFS_EXTENT_CSUM_KEY;
6032         key.offset = 0;
6033
6034         path = btrfs_alloc_path();
6035         if (!path)
6036                 return -ENOMEM;
6037
6038         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6039         if (ret < 0) {
6040                 fprintf(stderr, "Error searching csum tree %d\n", ret);
6041                 btrfs_free_path(path);
6042                 return ret;
6043         }
6044
6045         if (ret > 0 && path->slots[0])
6046                 path->slots[0]--;
6047         ret = 0;
6048
6049         while (1) {
6050                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6051                         ret = btrfs_next_leaf(root, path);
6052                         if (ret < 0) {
6053                                 fprintf(stderr, "Error going to next leaf "
6054                                         "%d\n", ret);
6055                                 break;
6056                         }
6057                         if (ret)
6058                                 break;
6059                 }
6060                 leaf = path->nodes[0];
6061
6062                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6063                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6064                         path->slots[0]++;
6065                         continue;
6066                 }
6067
6068                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
6069                               csum_size) * root->sectorsize;
6070                 if (!check_data_csum)
6071                         goto skip_csum_check;
6072                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
6073                 ret = check_extent_csums(root, key.offset, data_len,
6074                                          leaf_offset, leaf);
6075                 if (ret)
6076                         break;
6077 skip_csum_check:
6078                 if (!num_bytes) {
6079                         offset = key.offset;
6080                 } else if (key.offset != offset + num_bytes) {
6081                         ret = check_extent_exists(root, offset, num_bytes);
6082                         if (ret) {
6083                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6084                                         "there is no extent record\n",
6085                                         offset, offset+num_bytes);
6086                                 errors++;
6087                         }
6088                         offset = key.offset;
6089                         num_bytes = 0;
6090                 }
6091                 num_bytes += data_len;
6092                 path->slots[0]++;
6093         }
6094
6095         btrfs_free_path(path);
6096         return errors;
6097 }
6098
6099 static int is_dropped_key(struct btrfs_key *key,
6100                           struct btrfs_key *drop_key) {
6101         if (key->objectid < drop_key->objectid)
6102                 return 1;
6103         else if (key->objectid == drop_key->objectid) {
6104                 if (key->type < drop_key->type)
6105                         return 1;
6106                 else if (key->type == drop_key->type) {
6107                         if (key->offset < drop_key->offset)
6108                                 return 1;
6109                 }
6110         }
6111         return 0;
6112 }
6113
6114 /*
6115  * Here are the rules for FULL_BACKREF.
6116  *
6117  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6118  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6119  *      FULL_BACKREF set.
6120  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6121  *    if it happened after the relocation occurred since we'll have dropped the
6122  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6123  *    have no real way to know for sure.
6124  *
6125  * We process the blocks one root at a time, and we start from the lowest root
6126  * objectid and go to the highest.  So we can just lookup the owner backref for
6127  * the record and if we don't find it then we know it doesn't exist and we have
6128  * a FULL BACKREF.
6129  *
6130  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6131  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6132  * be set or not and then we can check later once we've gathered all the refs.
6133  */
6134 static int calc_extent_flag(struct btrfs_root *root,
6135                            struct cache_tree *extent_cache,
6136                            struct extent_buffer *buf,
6137                            struct root_item_record *ri,
6138                            u64 *flags)
6139 {
6140         struct extent_record *rec;
6141         struct cache_extent *cache;
6142         struct tree_backref *tback;
6143         u64 owner = 0;
6144
6145         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6146         /* we have added this extent before */
6147         BUG_ON(!cache);
6148         rec = container_of(cache, struct extent_record, cache);
6149
6150         /*
6151          * Except file/reloc tree, we can not have
6152          * FULL BACKREF MODE
6153          */
6154         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6155                 goto normal;
6156         /*
6157          * root node
6158          */
6159         if (buf->start == ri->bytenr)
6160                 goto normal;
6161
6162         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6163                 goto full_backref;
6164
6165         owner = btrfs_header_owner(buf);
6166         if (owner == ri->objectid)
6167                 goto normal;
6168
6169         tback = find_tree_backref(rec, 0, owner);
6170         if (!tback)
6171                 goto full_backref;
6172 normal:
6173         *flags = 0;
6174         if (rec->flag_block_full_backref != FLAG_UNSET &&
6175             rec->flag_block_full_backref != 0)
6176                 rec->bad_full_backref = 1;
6177         return 0;
6178 full_backref:
6179         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6180         if (rec->flag_block_full_backref != FLAG_UNSET &&
6181             rec->flag_block_full_backref != 1)
6182                 rec->bad_full_backref = 1;
6183         return 0;
6184 }
6185
6186 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6187 {
6188         fprintf(stderr, "Invalid key type(");
6189         print_key_type(stderr, 0, key_type);
6190         fprintf(stderr, ") found in root(");
6191         print_objectid(stderr, rootid, 0);
6192         fprintf(stderr, ")\n");
6193 }
6194
6195 /*
6196  * Check if the key is valid with its extent buffer.
6197  *
6198  * This is a early check in case invalid key exists in a extent buffer
6199  * This is not comprehensive yet, but should prevent wrong key/item passed
6200  * further
6201  */
6202 static int check_type_with_root(u64 rootid, u8 key_type)
6203 {
6204         switch (key_type) {
6205         /* Only valid in chunk tree */
6206         case BTRFS_DEV_ITEM_KEY:
6207         case BTRFS_CHUNK_ITEM_KEY:
6208                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6209                         goto err;
6210                 break;
6211         /* valid in csum and log tree */
6212         case BTRFS_CSUM_TREE_OBJECTID:
6213                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6214                       is_fstree(rootid)))
6215                         goto err;
6216                 break;
6217         case BTRFS_EXTENT_ITEM_KEY:
6218         case BTRFS_METADATA_ITEM_KEY:
6219         case BTRFS_BLOCK_GROUP_ITEM_KEY:
6220                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6221                         goto err;
6222                 break;
6223         case BTRFS_ROOT_ITEM_KEY:
6224                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6225                         goto err;
6226                 break;
6227         case BTRFS_DEV_EXTENT_KEY:
6228                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6229                         goto err;
6230                 break;
6231         }
6232         return 0;
6233 err:
6234         report_mismatch_key_root(key_type, rootid);
6235         return -EINVAL;
6236 }
6237
6238 static int run_next_block(struct btrfs_root *root,
6239                           struct block_info *bits,
6240                           int bits_nr,
6241                           u64 *last,
6242                           struct cache_tree *pending,
6243                           struct cache_tree *seen,
6244                           struct cache_tree *reada,
6245                           struct cache_tree *nodes,
6246                           struct cache_tree *extent_cache,
6247                           struct cache_tree *chunk_cache,
6248                           struct rb_root *dev_cache,
6249                           struct block_group_tree *block_group_cache,
6250                           struct device_extent_tree *dev_extent_cache,
6251                           struct root_item_record *ri)
6252 {
6253         struct extent_buffer *buf;
6254         struct extent_record *rec = NULL;
6255         u64 bytenr;
6256         u32 size;
6257         u64 parent;
6258         u64 owner;
6259         u64 flags;
6260         u64 ptr;
6261         u64 gen = 0;
6262         int ret = 0;
6263         int i;
6264         int nritems;
6265         struct btrfs_key key;
6266         struct cache_extent *cache;
6267         int reada_bits;
6268
6269         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6270                                     bits_nr, &reada_bits);
6271         if (nritems == 0)
6272                 return 1;
6273
6274         if (!reada_bits) {
6275                 for(i = 0; i < nritems; i++) {
6276                         ret = add_cache_extent(reada, bits[i].start,
6277                                                bits[i].size);
6278                         if (ret == -EEXIST)
6279                                 continue;
6280
6281                         /* fixme, get the parent transid */
6282                         readahead_tree_block(root, bits[i].start,
6283                                              bits[i].size, 0);
6284                 }
6285         }
6286         *last = bits[0].start;
6287         bytenr = bits[0].start;
6288         size = bits[0].size;
6289
6290         cache = lookup_cache_extent(pending, bytenr, size);
6291         if (cache) {
6292                 remove_cache_extent(pending, cache);
6293                 free(cache);
6294         }
6295         cache = lookup_cache_extent(reada, bytenr, size);
6296         if (cache) {
6297                 remove_cache_extent(reada, cache);
6298                 free(cache);
6299         }
6300         cache = lookup_cache_extent(nodes, bytenr, size);
6301         if (cache) {
6302                 remove_cache_extent(nodes, cache);
6303                 free(cache);
6304         }
6305         cache = lookup_cache_extent(extent_cache, bytenr, size);
6306         if (cache) {
6307                 rec = container_of(cache, struct extent_record, cache);
6308                 gen = rec->parent_generation;
6309         }
6310
6311         /* fixme, get the real parent transid */
6312         buf = read_tree_block(root, bytenr, size, gen);
6313         if (!extent_buffer_uptodate(buf)) {
6314                 record_bad_block_io(root->fs_info,
6315                                     extent_cache, bytenr, size);
6316                 goto out;
6317         }
6318
6319         nritems = btrfs_header_nritems(buf);
6320
6321         flags = 0;
6322         if (!init_extent_tree) {
6323                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6324                                        btrfs_header_level(buf), 1, NULL,
6325                                        &flags);
6326                 if (ret < 0) {
6327                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6328                         if (ret < 0) {
6329                                 fprintf(stderr, "Couldn't calc extent flags\n");
6330                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6331                         }
6332                 }
6333         } else {
6334                 flags = 0;
6335                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6336                 if (ret < 0) {
6337                         fprintf(stderr, "Couldn't calc extent flags\n");
6338                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6339                 }
6340         }
6341
6342         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6343                 if (ri != NULL &&
6344                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6345                     ri->objectid == btrfs_header_owner(buf)) {
6346                         /*
6347                          * Ok we got to this block from it's original owner and
6348                          * we have FULL_BACKREF set.  Relocation can leave
6349                          * converted blocks over so this is altogether possible,
6350                          * however it's not possible if the generation > the
6351                          * last snapshot, so check for this case.
6352                          */
6353                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6354                             btrfs_header_generation(buf) > ri->last_snapshot) {
6355                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6356                                 rec->bad_full_backref = 1;
6357                         }
6358                 }
6359         } else {
6360                 if (ri != NULL &&
6361                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6362                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6363                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6364                         rec->bad_full_backref = 1;
6365                 }
6366         }
6367
6368         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6369                 rec->flag_block_full_backref = 1;
6370                 parent = bytenr;
6371                 owner = 0;
6372         } else {
6373                 rec->flag_block_full_backref = 0;
6374                 parent = 0;
6375                 owner = btrfs_header_owner(buf);
6376         }
6377
6378         ret = check_block(root, extent_cache, buf, flags);
6379         if (ret)
6380                 goto out;
6381
6382         if (btrfs_is_leaf(buf)) {
6383                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6384                 for (i = 0; i < nritems; i++) {
6385                         struct btrfs_file_extent_item *fi;
6386                         btrfs_item_key_to_cpu(buf, &key, i);
6387                         /*
6388                          * Check key type against the leaf owner.
6389                          * Could filter quite a lot of early error if
6390                          * owner is correct
6391                          */
6392                         if (check_type_with_root(btrfs_header_owner(buf),
6393                                                  key.type)) {
6394                                 fprintf(stderr, "ignoring invalid key\n");
6395                                 continue;
6396                         }
6397                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6398                                 process_extent_item(root, extent_cache, buf,
6399                                                     i);
6400                                 continue;
6401                         }
6402                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6403                                 process_extent_item(root, extent_cache, buf,
6404                                                     i);
6405                                 continue;
6406                         }
6407                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6408                                 total_csum_bytes +=
6409                                         btrfs_item_size_nr(buf, i);
6410                                 continue;
6411                         }
6412                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6413                                 process_chunk_item(chunk_cache, &key, buf, i);
6414                                 continue;
6415                         }
6416                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6417                                 process_device_item(dev_cache, &key, buf, i);
6418                                 continue;
6419                         }
6420                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6421                                 process_block_group_item(block_group_cache,
6422                                         &key, buf, i);
6423                                 continue;
6424                         }
6425                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6426                                 process_device_extent_item(dev_extent_cache,
6427                                         &key, buf, i);
6428                                 continue;
6429
6430                         }
6431                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6432 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6433                                 process_extent_ref_v0(extent_cache, buf, i);
6434 #else
6435                                 BUG();
6436 #endif
6437                                 continue;
6438                         }
6439
6440                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6441                                 ret = add_tree_backref(extent_cache,
6442                                                 key.objectid, 0, key.offset, 0);
6443                                 if (ret < 0)
6444                                         error("add_tree_backref failed: %s",
6445                                               strerror(-ret));
6446                                 continue;
6447                         }
6448                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6449                                 ret = add_tree_backref(extent_cache,
6450                                                 key.objectid, key.offset, 0, 0);
6451                                 if (ret < 0)
6452                                         error("add_tree_backref failed: %s",
6453                                               strerror(-ret));
6454                                 continue;
6455                         }
6456                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6457                                 struct btrfs_extent_data_ref *ref;
6458                                 ref = btrfs_item_ptr(buf, i,
6459                                                 struct btrfs_extent_data_ref);
6460                                 add_data_backref(extent_cache,
6461                                         key.objectid, 0,
6462                                         btrfs_extent_data_ref_root(buf, ref),
6463                                         btrfs_extent_data_ref_objectid(buf,
6464                                                                        ref),
6465                                         btrfs_extent_data_ref_offset(buf, ref),
6466                                         btrfs_extent_data_ref_count(buf, ref),
6467                                         0, root->sectorsize);
6468                                 continue;
6469                         }
6470                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6471                                 struct btrfs_shared_data_ref *ref;
6472                                 ref = btrfs_item_ptr(buf, i,
6473                                                 struct btrfs_shared_data_ref);
6474                                 add_data_backref(extent_cache,
6475                                         key.objectid, key.offset, 0, 0, 0,
6476                                         btrfs_shared_data_ref_count(buf, ref),
6477                                         0, root->sectorsize);
6478                                 continue;
6479                         }
6480                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6481                                 struct bad_item *bad;
6482
6483                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6484                                         continue;
6485                                 if (!owner)
6486                                         continue;
6487                                 bad = malloc(sizeof(struct bad_item));
6488                                 if (!bad)
6489                                         continue;
6490                                 INIT_LIST_HEAD(&bad->list);
6491                                 memcpy(&bad->key, &key,
6492                                        sizeof(struct btrfs_key));
6493                                 bad->root_id = owner;
6494                                 list_add_tail(&bad->list, &delete_items);
6495                                 continue;
6496                         }
6497                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6498                                 continue;
6499                         fi = btrfs_item_ptr(buf, i,
6500                                             struct btrfs_file_extent_item);
6501                         if (btrfs_file_extent_type(buf, fi) ==
6502                             BTRFS_FILE_EXTENT_INLINE)
6503                                 continue;
6504                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6505                                 continue;
6506
6507                         data_bytes_allocated +=
6508                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6509                         if (data_bytes_allocated < root->sectorsize) {
6510                                 abort();
6511                         }
6512                         data_bytes_referenced +=
6513                                 btrfs_file_extent_num_bytes(buf, fi);
6514                         add_data_backref(extent_cache,
6515                                 btrfs_file_extent_disk_bytenr(buf, fi),
6516                                 parent, owner, key.objectid, key.offset -
6517                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6518                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6519                 }
6520         } else {
6521                 int level;
6522                 struct btrfs_key first_key;
6523
6524                 first_key.objectid = 0;
6525
6526                 if (nritems > 0)
6527                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6528                 level = btrfs_header_level(buf);
6529                 for (i = 0; i < nritems; i++) {
6530                         struct extent_record tmpl;
6531
6532                         ptr = btrfs_node_blockptr(buf, i);
6533                         size = root->nodesize;
6534                         btrfs_node_key_to_cpu(buf, &key, i);
6535                         if (ri != NULL) {
6536                                 if ((level == ri->drop_level)
6537                                     && is_dropped_key(&key, &ri->drop_key)) {
6538                                         continue;
6539                                 }
6540                         }
6541
6542                         memset(&tmpl, 0, sizeof(tmpl));
6543                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6544                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6545                         tmpl.start = ptr;
6546                         tmpl.nr = size;
6547                         tmpl.refs = 1;
6548                         tmpl.metadata = 1;
6549                         tmpl.max_size = size;
6550                         ret = add_extent_rec(extent_cache, &tmpl);
6551                         if (ret < 0)
6552                                 goto out;
6553
6554                         ret = add_tree_backref(extent_cache, ptr, parent,
6555                                         owner, 1);
6556                         if (ret < 0) {
6557                                 error("add_tree_backref failed: %s",
6558                                       strerror(-ret));
6559                                 continue;
6560                         }
6561
6562                         if (level > 1) {
6563                                 add_pending(nodes, seen, ptr, size);
6564                         } else {
6565                                 add_pending(pending, seen, ptr, size);
6566                         }
6567                 }
6568                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6569                                       nritems) * sizeof(struct btrfs_key_ptr);
6570         }
6571         total_btree_bytes += buf->len;
6572         if (fs_root_objectid(btrfs_header_owner(buf)))
6573                 total_fs_tree_bytes += buf->len;
6574         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6575                 total_extent_tree_bytes += buf->len;
6576         if (!found_old_backref &&
6577             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6578             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6579             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6580                 found_old_backref = 1;
6581 out:
6582         free_extent_buffer(buf);
6583         return ret;
6584 }
6585
6586 static int add_root_to_pending(struct extent_buffer *buf,
6587                                struct cache_tree *extent_cache,
6588                                struct cache_tree *pending,
6589                                struct cache_tree *seen,
6590                                struct cache_tree *nodes,
6591                                u64 objectid)
6592 {
6593         struct extent_record tmpl;
6594         int ret;
6595
6596         if (btrfs_header_level(buf) > 0)
6597                 add_pending(nodes, seen, buf->start, buf->len);
6598         else
6599                 add_pending(pending, seen, buf->start, buf->len);
6600
6601         memset(&tmpl, 0, sizeof(tmpl));
6602         tmpl.start = buf->start;
6603         tmpl.nr = buf->len;
6604         tmpl.is_root = 1;
6605         tmpl.refs = 1;
6606         tmpl.metadata = 1;
6607         tmpl.max_size = buf->len;
6608         add_extent_rec(extent_cache, &tmpl);
6609
6610         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6611             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6612                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6613                                 0, 1);
6614         else
6615                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6616                                 1);
6617         return ret;
6618 }
6619
6620 /* as we fix the tree, we might be deleting blocks that
6621  * we're tracking for repair.  This hook makes sure we
6622  * remove any backrefs for blocks as we are fixing them.
6623  */
6624 static int free_extent_hook(struct btrfs_trans_handle *trans,
6625                             struct btrfs_root *root,
6626                             u64 bytenr, u64 num_bytes, u64 parent,
6627                             u64 root_objectid, u64 owner, u64 offset,
6628                             int refs_to_drop)
6629 {
6630         struct extent_record *rec;
6631         struct cache_extent *cache;
6632         int is_data;
6633         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6634
6635         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6636         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6637         if (!cache)
6638                 return 0;
6639
6640         rec = container_of(cache, struct extent_record, cache);
6641         if (is_data) {
6642                 struct data_backref *back;
6643                 back = find_data_backref(rec, parent, root_objectid, owner,
6644                                          offset, 1, bytenr, num_bytes);
6645                 if (!back)
6646                         goto out;
6647                 if (back->node.found_ref) {
6648                         back->found_ref -= refs_to_drop;
6649                         if (rec->refs)
6650                                 rec->refs -= refs_to_drop;
6651                 }
6652                 if (back->node.found_extent_tree) {
6653                         back->num_refs -= refs_to_drop;
6654                         if (rec->extent_item_refs)
6655                                 rec->extent_item_refs -= refs_to_drop;
6656                 }
6657                 if (back->found_ref == 0)
6658                         back->node.found_ref = 0;
6659                 if (back->num_refs == 0)
6660                         back->node.found_extent_tree = 0;
6661
6662                 if (!back->node.found_extent_tree && back->node.found_ref) {
6663                         list_del(&back->node.list);
6664                         free(back);
6665                 }
6666         } else {
6667                 struct tree_backref *back;
6668                 back = find_tree_backref(rec, parent, root_objectid);
6669                 if (!back)
6670                         goto out;
6671                 if (back->node.found_ref) {
6672                         if (rec->refs)
6673                                 rec->refs--;
6674                         back->node.found_ref = 0;
6675                 }
6676                 if (back->node.found_extent_tree) {
6677                         if (rec->extent_item_refs)
6678                                 rec->extent_item_refs--;
6679                         back->node.found_extent_tree = 0;
6680                 }
6681                 if (!back->node.found_extent_tree && back->node.found_ref) {
6682                         list_del(&back->node.list);
6683                         free(back);
6684                 }
6685         }
6686         maybe_free_extent_rec(extent_cache, rec);
6687 out:
6688         return 0;
6689 }
6690
6691 static int delete_extent_records(struct btrfs_trans_handle *trans,
6692                                  struct btrfs_root *root,
6693                                  struct btrfs_path *path,
6694                                  u64 bytenr, u64 new_len)
6695 {
6696         struct btrfs_key key;
6697         struct btrfs_key found_key;
6698         struct extent_buffer *leaf;
6699         int ret;
6700         int slot;
6701
6702
6703         key.objectid = bytenr;
6704         key.type = (u8)-1;
6705         key.offset = (u64)-1;
6706
6707         while(1) {
6708                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6709                                         &key, path, 0, 1);
6710                 if (ret < 0)
6711                         break;
6712
6713                 if (ret > 0) {
6714                         ret = 0;
6715                         if (path->slots[0] == 0)
6716                                 break;
6717                         path->slots[0]--;
6718                 }
6719                 ret = 0;
6720
6721                 leaf = path->nodes[0];
6722                 slot = path->slots[0];
6723
6724                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6725                 if (found_key.objectid != bytenr)
6726                         break;
6727
6728                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6729                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6730                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6731                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6732                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6733                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6734                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6735                         btrfs_release_path(path);
6736                         if (found_key.type == 0) {
6737                                 if (found_key.offset == 0)
6738                                         break;
6739                                 key.offset = found_key.offset - 1;
6740                                 key.type = found_key.type;
6741                         }
6742                         key.type = found_key.type - 1;
6743                         key.offset = (u64)-1;
6744                         continue;
6745                 }
6746
6747                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6748                         found_key.objectid, found_key.type, found_key.offset);
6749
6750                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6751                 if (ret)
6752                         break;
6753                 btrfs_release_path(path);
6754
6755                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6756                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6757                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6758                                 found_key.offset : root->nodesize;
6759
6760                         ret = btrfs_update_block_group(trans, root, bytenr,
6761                                                        bytes, 0, 0);
6762                         if (ret)
6763                                 break;
6764                 }
6765         }
6766
6767         btrfs_release_path(path);
6768         return ret;
6769 }
6770
6771 /*
6772  * for a single backref, this will allocate a new extent
6773  * and add the backref to it.
6774  */
6775 static int record_extent(struct btrfs_trans_handle *trans,
6776                          struct btrfs_fs_info *info,
6777                          struct btrfs_path *path,
6778                          struct extent_record *rec,
6779                          struct extent_backref *back,
6780                          int allocated, u64 flags)
6781 {
6782         int ret;
6783         struct btrfs_root *extent_root = info->extent_root;
6784         struct extent_buffer *leaf;
6785         struct btrfs_key ins_key;
6786         struct btrfs_extent_item *ei;
6787         struct tree_backref *tback;
6788         struct data_backref *dback;
6789         struct btrfs_tree_block_info *bi;
6790
6791         if (!back->is_data)
6792                 rec->max_size = max_t(u64, rec->max_size,
6793                                     info->extent_root->nodesize);
6794
6795         if (!allocated) {
6796                 u32 item_size = sizeof(*ei);
6797
6798                 if (!back->is_data)
6799                         item_size += sizeof(*bi);
6800
6801                 ins_key.objectid = rec->start;
6802                 ins_key.offset = rec->max_size;
6803                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6804
6805                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6806                                         &ins_key, item_size);
6807                 if (ret)
6808                         goto fail;
6809
6810                 leaf = path->nodes[0];
6811                 ei = btrfs_item_ptr(leaf, path->slots[0],
6812                                     struct btrfs_extent_item);
6813
6814                 btrfs_set_extent_refs(leaf, ei, 0);
6815                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6816
6817                 if (back->is_data) {
6818                         btrfs_set_extent_flags(leaf, ei,
6819                                                BTRFS_EXTENT_FLAG_DATA);
6820                 } else {
6821                         struct btrfs_disk_key copy_key;;
6822
6823                         tback = to_tree_backref(back);
6824                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6825                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6826                                              sizeof(*bi));
6827
6828                         btrfs_set_disk_key_objectid(&copy_key,
6829                                                     rec->info_objectid);
6830                         btrfs_set_disk_key_type(&copy_key, 0);
6831                         btrfs_set_disk_key_offset(&copy_key, 0);
6832
6833                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6834                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6835
6836                         btrfs_set_extent_flags(leaf, ei,
6837                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6838                 }
6839
6840                 btrfs_mark_buffer_dirty(leaf);
6841                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6842                                                rec->max_size, 1, 0);
6843                 if (ret)
6844                         goto fail;
6845                 btrfs_release_path(path);
6846         }
6847
6848         if (back->is_data) {
6849                 u64 parent;
6850                 int i;
6851
6852                 dback = to_data_backref(back);
6853                 if (back->full_backref)
6854                         parent = dback->parent;
6855                 else
6856                         parent = 0;
6857
6858                 for (i = 0; i < dback->found_ref; i++) {
6859                         /* if parent != 0, we're doing a full backref
6860                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6861                          * just makes the backref allocator create a data
6862                          * backref
6863                          */
6864                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6865                                                    rec->start, rec->max_size,
6866                                                    parent,
6867                                                    dback->root,
6868                                                    parent ?
6869                                                    BTRFS_FIRST_FREE_OBJECTID :
6870                                                    dback->owner,
6871                                                    dback->offset);
6872                         if (ret)
6873                                 break;
6874                 }
6875                 fprintf(stderr, "adding new data backref"
6876                                 " on %llu %s %llu owner %llu"
6877                                 " offset %llu found %d\n",
6878                                 (unsigned long long)rec->start,
6879                                 back->full_backref ?
6880                                 "parent" : "root",
6881                                 back->full_backref ?
6882                                 (unsigned long long)parent :
6883                                 (unsigned long long)dback->root,
6884                                 (unsigned long long)dback->owner,
6885                                 (unsigned long long)dback->offset,
6886                                 dback->found_ref);
6887         } else {
6888                 u64 parent;
6889
6890                 tback = to_tree_backref(back);
6891                 if (back->full_backref)
6892                         parent = tback->parent;
6893                 else
6894                         parent = 0;
6895
6896                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6897                                            rec->start, rec->max_size,
6898                                            parent, tback->root, 0, 0);
6899                 fprintf(stderr, "adding new tree backref on "
6900                         "start %llu len %llu parent %llu root %llu\n",
6901                         rec->start, rec->max_size, parent, tback->root);
6902         }
6903 fail:
6904         btrfs_release_path(path);
6905         return ret;
6906 }
6907
6908 static struct extent_entry *find_entry(struct list_head *entries,
6909                                        u64 bytenr, u64 bytes)
6910 {
6911         struct extent_entry *entry = NULL;
6912
6913         list_for_each_entry(entry, entries, list) {
6914                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6915                         return entry;
6916         }
6917
6918         return NULL;
6919 }
6920
6921 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6922 {
6923         struct extent_entry *entry, *best = NULL, *prev = NULL;
6924
6925         list_for_each_entry(entry, entries, list) {
6926                 if (!prev) {
6927                         prev = entry;
6928                         continue;
6929                 }
6930
6931                 /*
6932                  * If there are as many broken entries as entries then we know
6933                  * not to trust this particular entry.
6934                  */
6935                 if (entry->broken == entry->count)
6936                         continue;
6937
6938                 /*
6939                  * If our current entry == best then we can't be sure our best
6940                  * is really the best, so we need to keep searching.
6941                  */
6942                 if (best && best->count == entry->count) {
6943                         prev = entry;
6944                         best = NULL;
6945                         continue;
6946                 }
6947
6948                 /* Prev == entry, not good enough, have to keep searching */
6949                 if (!prev->broken && prev->count == entry->count)
6950                         continue;
6951
6952                 if (!best)
6953                         best = (prev->count > entry->count) ? prev : entry;
6954                 else if (best->count < entry->count)
6955                         best = entry;
6956                 prev = entry;
6957         }
6958
6959         return best;
6960 }
6961
6962 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6963                       struct data_backref *dback, struct extent_entry *entry)
6964 {
6965         struct btrfs_trans_handle *trans;
6966         struct btrfs_root *root;
6967         struct btrfs_file_extent_item *fi;
6968         struct extent_buffer *leaf;
6969         struct btrfs_key key;
6970         u64 bytenr, bytes;
6971         int ret, err;
6972
6973         key.objectid = dback->root;
6974         key.type = BTRFS_ROOT_ITEM_KEY;
6975         key.offset = (u64)-1;
6976         root = btrfs_read_fs_root(info, &key);
6977         if (IS_ERR(root)) {
6978                 fprintf(stderr, "Couldn't find root for our ref\n");
6979                 return -EINVAL;
6980         }
6981
6982         /*
6983          * The backref points to the original offset of the extent if it was
6984          * split, so we need to search down to the offset we have and then walk
6985          * forward until we find the backref we're looking for.
6986          */
6987         key.objectid = dback->owner;
6988         key.type = BTRFS_EXTENT_DATA_KEY;
6989         key.offset = dback->offset;
6990         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6991         if (ret < 0) {
6992                 fprintf(stderr, "Error looking up ref %d\n", ret);
6993                 return ret;
6994         }
6995
6996         while (1) {
6997                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6998                         ret = btrfs_next_leaf(root, path);
6999                         if (ret) {
7000                                 fprintf(stderr, "Couldn't find our ref, next\n");
7001                                 return -EINVAL;
7002                         }
7003                 }
7004                 leaf = path->nodes[0];
7005                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7006                 if (key.objectid != dback->owner ||
7007                     key.type != BTRFS_EXTENT_DATA_KEY) {
7008                         fprintf(stderr, "Couldn't find our ref, search\n");
7009                         return -EINVAL;
7010                 }
7011                 fi = btrfs_item_ptr(leaf, path->slots[0],
7012                                     struct btrfs_file_extent_item);
7013                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7014                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7015
7016                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7017                         break;
7018                 path->slots[0]++;
7019         }
7020
7021         btrfs_release_path(path);
7022
7023         trans = btrfs_start_transaction(root, 1);
7024         if (IS_ERR(trans))
7025                 return PTR_ERR(trans);
7026
7027         /*
7028          * Ok we have the key of the file extent we want to fix, now we can cow
7029          * down to the thing and fix it.
7030          */
7031         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7032         if (ret < 0) {
7033                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7034                         key.objectid, key.type, key.offset, ret);
7035                 goto out;
7036         }
7037         if (ret > 0) {
7038                 fprintf(stderr, "Well that's odd, we just found this key "
7039                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7040                         key.offset);
7041                 ret = -EINVAL;
7042                 goto out;
7043         }
7044         leaf = path->nodes[0];
7045         fi = btrfs_item_ptr(leaf, path->slots[0],
7046                             struct btrfs_file_extent_item);
7047
7048         if (btrfs_file_extent_compression(leaf, fi) &&
7049             dback->disk_bytenr != entry->bytenr) {
7050                 fprintf(stderr, "Ref doesn't match the record start and is "
7051                         "compressed, please take a btrfs-image of this file "
7052                         "system and send it to a btrfs developer so they can "
7053                         "complete this functionality for bytenr %Lu\n",
7054                         dback->disk_bytenr);
7055                 ret = -EINVAL;
7056                 goto out;
7057         }
7058
7059         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7060                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7061         } else if (dback->disk_bytenr > entry->bytenr) {
7062                 u64 off_diff, offset;
7063
7064                 off_diff = dback->disk_bytenr - entry->bytenr;
7065                 offset = btrfs_file_extent_offset(leaf, fi);
7066                 if (dback->disk_bytenr + offset +
7067                     btrfs_file_extent_num_bytes(leaf, fi) >
7068                     entry->bytenr + entry->bytes) {
7069                         fprintf(stderr, "Ref is past the entry end, please "
7070                                 "take a btrfs-image of this file system and "
7071                                 "send it to a btrfs developer, ref %Lu\n",
7072                                 dback->disk_bytenr);
7073                         ret = -EINVAL;
7074                         goto out;
7075                 }
7076                 offset += off_diff;
7077                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7078                 btrfs_set_file_extent_offset(leaf, fi, offset);
7079         } else if (dback->disk_bytenr < entry->bytenr) {
7080                 u64 offset;
7081
7082                 offset = btrfs_file_extent_offset(leaf, fi);
7083                 if (dback->disk_bytenr + offset < entry->bytenr) {
7084                         fprintf(stderr, "Ref is before the entry start, please"
7085                                 " take a btrfs-image of this file system and "
7086                                 "send it to a btrfs developer, ref %Lu\n",
7087                                 dback->disk_bytenr);
7088                         ret = -EINVAL;
7089                         goto out;
7090                 }
7091
7092                 offset += dback->disk_bytenr;
7093                 offset -= entry->bytenr;
7094                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7095                 btrfs_set_file_extent_offset(leaf, fi, offset);
7096         }
7097
7098         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7099
7100         /*
7101          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7102          * only do this if we aren't using compression, otherwise it's a
7103          * trickier case.
7104          */
7105         if (!btrfs_file_extent_compression(leaf, fi))
7106                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7107         else
7108                 printf("ram bytes may be wrong?\n");
7109         btrfs_mark_buffer_dirty(leaf);
7110 out:
7111         err = btrfs_commit_transaction(trans, root);
7112         btrfs_release_path(path);
7113         return ret ? ret : err;
7114 }
7115
7116 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7117                            struct extent_record *rec)
7118 {
7119         struct extent_backref *back;
7120         struct data_backref *dback;
7121         struct extent_entry *entry, *best = NULL;
7122         LIST_HEAD(entries);
7123         int nr_entries = 0;
7124         int broken_entries = 0;
7125         int ret = 0;
7126         short mismatch = 0;
7127
7128         /*
7129          * Metadata is easy and the backrefs should always agree on bytenr and
7130          * size, if not we've got bigger issues.
7131          */
7132         if (rec->metadata)
7133                 return 0;
7134
7135         list_for_each_entry(back, &rec->backrefs, list) {
7136                 if (back->full_backref || !back->is_data)
7137                         continue;
7138
7139                 dback = to_data_backref(back);
7140
7141                 /*
7142                  * We only pay attention to backrefs that we found a real
7143                  * backref for.
7144                  */
7145                 if (dback->found_ref == 0)
7146                         continue;
7147
7148                 /*
7149                  * For now we only catch when the bytes don't match, not the
7150                  * bytenr.  We can easily do this at the same time, but I want
7151                  * to have a fs image to test on before we just add repair
7152                  * functionality willy-nilly so we know we won't screw up the
7153                  * repair.
7154                  */
7155
7156                 entry = find_entry(&entries, dback->disk_bytenr,
7157                                    dback->bytes);
7158                 if (!entry) {
7159                         entry = malloc(sizeof(struct extent_entry));
7160                         if (!entry) {
7161                                 ret = -ENOMEM;
7162                                 goto out;
7163                         }
7164                         memset(entry, 0, sizeof(*entry));
7165                         entry->bytenr = dback->disk_bytenr;
7166                         entry->bytes = dback->bytes;
7167                         list_add_tail(&entry->list, &entries);
7168                         nr_entries++;
7169                 }
7170
7171                 /*
7172                  * If we only have on entry we may think the entries agree when
7173                  * in reality they don't so we have to do some extra checking.
7174                  */
7175                 if (dback->disk_bytenr != rec->start ||
7176                     dback->bytes != rec->nr || back->broken)
7177                         mismatch = 1;
7178
7179                 if (back->broken) {
7180                         entry->broken++;
7181                         broken_entries++;
7182                 }
7183
7184                 entry->count++;
7185         }
7186
7187         /* Yay all the backrefs agree, carry on good sir */
7188         if (nr_entries <= 1 && !mismatch)
7189                 goto out;
7190
7191         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7192                 "%Lu\n", rec->start);
7193
7194         /*
7195          * First we want to see if the backrefs can agree amongst themselves who
7196          * is right, so figure out which one of the entries has the highest
7197          * count.
7198          */
7199         best = find_most_right_entry(&entries);
7200
7201         /*
7202          * Ok so we may have an even split between what the backrefs think, so
7203          * this is where we use the extent ref to see what it thinks.
7204          */
7205         if (!best) {
7206                 entry = find_entry(&entries, rec->start, rec->nr);
7207                 if (!entry && (!broken_entries || !rec->found_rec)) {
7208                         fprintf(stderr, "Backrefs don't agree with each other "
7209                                 "and extent record doesn't agree with anybody,"
7210                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7211                                 rec->start, rec->nr);
7212                         ret = -EINVAL;
7213                         goto out;
7214                 } else if (!entry) {
7215                         /*
7216                          * Ok our backrefs were broken, we'll assume this is the
7217                          * correct value and add an entry for this range.
7218                          */
7219                         entry = malloc(sizeof(struct extent_entry));
7220                         if (!entry) {
7221                                 ret = -ENOMEM;
7222                                 goto out;
7223                         }
7224                         memset(entry, 0, sizeof(*entry));
7225                         entry->bytenr = rec->start;
7226                         entry->bytes = rec->nr;
7227                         list_add_tail(&entry->list, &entries);
7228                         nr_entries++;
7229                 }
7230                 entry->count++;
7231                 best = find_most_right_entry(&entries);
7232                 if (!best) {
7233                         fprintf(stderr, "Backrefs and extent record evenly "
7234                                 "split on who is right, this is going to "
7235                                 "require user input to fix bytenr %Lu bytes "
7236                                 "%Lu\n", rec->start, rec->nr);
7237                         ret = -EINVAL;
7238                         goto out;
7239                 }
7240         }
7241
7242         /*
7243          * I don't think this can happen currently as we'll abort() if we catch
7244          * this case higher up, but in case somebody removes that we still can't
7245          * deal with it properly here yet, so just bail out of that's the case.
7246          */
7247         if (best->bytenr != rec->start) {
7248                 fprintf(stderr, "Extent start and backref starts don't match, "
7249                         "please use btrfs-image on this file system and send "
7250                         "it to a btrfs developer so they can make fsck fix "
7251                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7252                         rec->start, rec->nr);
7253                 ret = -EINVAL;
7254                 goto out;
7255         }
7256
7257         /*
7258          * Ok great we all agreed on an extent record, let's go find the real
7259          * references and fix up the ones that don't match.
7260          */
7261         list_for_each_entry(back, &rec->backrefs, list) {
7262                 if (back->full_backref || !back->is_data)
7263                         continue;
7264
7265                 dback = to_data_backref(back);
7266
7267                 /*
7268                  * Still ignoring backrefs that don't have a real ref attached
7269                  * to them.
7270                  */
7271                 if (dback->found_ref == 0)
7272                         continue;
7273
7274                 if (dback->bytes == best->bytes &&
7275                     dback->disk_bytenr == best->bytenr)
7276                         continue;
7277
7278                 ret = repair_ref(info, path, dback, best);
7279                 if (ret)
7280                         goto out;
7281         }
7282
7283         /*
7284          * Ok we messed with the actual refs, which means we need to drop our
7285          * entire cache and go back and rescan.  I know this is a huge pain and
7286          * adds a lot of extra work, but it's the only way to be safe.  Once all
7287          * the backrefs agree we may not need to do anything to the extent
7288          * record itself.
7289          */
7290         ret = -EAGAIN;
7291 out:
7292         while (!list_empty(&entries)) {
7293                 entry = list_entry(entries.next, struct extent_entry, list);
7294                 list_del_init(&entry->list);
7295                 free(entry);
7296         }
7297         return ret;
7298 }
7299
7300 static int process_duplicates(struct btrfs_root *root,
7301                               struct cache_tree *extent_cache,
7302                               struct extent_record *rec)
7303 {
7304         struct extent_record *good, *tmp;
7305         struct cache_extent *cache;
7306         int ret;
7307
7308         /*
7309          * If we found a extent record for this extent then return, or if we
7310          * have more than one duplicate we are likely going to need to delete
7311          * something.
7312          */
7313         if (rec->found_rec || rec->num_duplicates > 1)
7314                 return 0;
7315
7316         /* Shouldn't happen but just in case */
7317         BUG_ON(!rec->num_duplicates);
7318
7319         /*
7320          * So this happens if we end up with a backref that doesn't match the
7321          * actual extent entry.  So either the backref is bad or the extent
7322          * entry is bad.  Either way we want to have the extent_record actually
7323          * reflect what we found in the extent_tree, so we need to take the
7324          * duplicate out and use that as the extent_record since the only way we
7325          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7326          */
7327         remove_cache_extent(extent_cache, &rec->cache);
7328
7329         good = to_extent_record(rec->dups.next);
7330         list_del_init(&good->list);
7331         INIT_LIST_HEAD(&good->backrefs);
7332         INIT_LIST_HEAD(&good->dups);
7333         good->cache.start = good->start;
7334         good->cache.size = good->nr;
7335         good->content_checked = 0;
7336         good->owner_ref_checked = 0;
7337         good->num_duplicates = 0;
7338         good->refs = rec->refs;
7339         list_splice_init(&rec->backrefs, &good->backrefs);
7340         while (1) {
7341                 cache = lookup_cache_extent(extent_cache, good->start,
7342                                             good->nr);
7343                 if (!cache)
7344                         break;
7345                 tmp = container_of(cache, struct extent_record, cache);
7346
7347                 /*
7348                  * If we find another overlapping extent and it's found_rec is
7349                  * set then it's a duplicate and we need to try and delete
7350                  * something.
7351                  */
7352                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7353                         if (list_empty(&good->list))
7354                                 list_add_tail(&good->list,
7355                                               &duplicate_extents);
7356                         good->num_duplicates += tmp->num_duplicates + 1;
7357                         list_splice_init(&tmp->dups, &good->dups);
7358                         list_del_init(&tmp->list);
7359                         list_add_tail(&tmp->list, &good->dups);
7360                         remove_cache_extent(extent_cache, &tmp->cache);
7361                         continue;
7362                 }
7363
7364                 /*
7365                  * Ok we have another non extent item backed extent rec, so lets
7366                  * just add it to this extent and carry on like we did above.
7367                  */
7368                 good->refs += tmp->refs;
7369                 list_splice_init(&tmp->backrefs, &good->backrefs);
7370                 remove_cache_extent(extent_cache, &tmp->cache);
7371                 free(tmp);
7372         }
7373         ret = insert_cache_extent(extent_cache, &good->cache);
7374         BUG_ON(ret);
7375         free(rec);
7376         return good->num_duplicates ? 0 : 1;
7377 }
7378
7379 static int delete_duplicate_records(struct btrfs_root *root,
7380                                     struct extent_record *rec)
7381 {
7382         struct btrfs_trans_handle *trans;
7383         LIST_HEAD(delete_list);
7384         struct btrfs_path *path;
7385         struct extent_record *tmp, *good, *n;
7386         int nr_del = 0;
7387         int ret = 0, err;
7388         struct btrfs_key key;
7389
7390         path = btrfs_alloc_path();
7391         if (!path) {
7392                 ret = -ENOMEM;
7393                 goto out;
7394         }
7395
7396         good = rec;
7397         /* Find the record that covers all of the duplicates. */
7398         list_for_each_entry(tmp, &rec->dups, list) {
7399                 if (good->start < tmp->start)
7400                         continue;
7401                 if (good->nr > tmp->nr)
7402                         continue;
7403
7404                 if (tmp->start + tmp->nr < good->start + good->nr) {
7405                         fprintf(stderr, "Ok we have overlapping extents that "
7406                                 "aren't completely covered by each other, this "
7407                                 "is going to require more careful thought.  "
7408                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7409                                 tmp->start, tmp->nr, good->start, good->nr);
7410                         abort();
7411                 }
7412                 good = tmp;
7413         }
7414
7415         if (good != rec)
7416                 list_add_tail(&rec->list, &delete_list);
7417
7418         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7419                 if (tmp == good)
7420                         continue;
7421                 list_move_tail(&tmp->list, &delete_list);
7422         }
7423
7424         root = root->fs_info->extent_root;
7425         trans = btrfs_start_transaction(root, 1);
7426         if (IS_ERR(trans)) {
7427                 ret = PTR_ERR(trans);
7428                 goto out;
7429         }
7430
7431         list_for_each_entry(tmp, &delete_list, list) {
7432                 if (tmp->found_rec == 0)
7433                         continue;
7434                 key.objectid = tmp->start;
7435                 key.type = BTRFS_EXTENT_ITEM_KEY;
7436                 key.offset = tmp->nr;
7437
7438                 /* Shouldn't happen but just in case */
7439                 if (tmp->metadata) {
7440                         fprintf(stderr, "Well this shouldn't happen, extent "
7441                                 "record overlaps but is metadata? "
7442                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7443                         abort();
7444                 }
7445
7446                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7447                 if (ret) {
7448                         if (ret > 0)
7449                                 ret = -EINVAL;
7450                         break;
7451                 }
7452                 ret = btrfs_del_item(trans, root, path);
7453                 if (ret)
7454                         break;
7455                 btrfs_release_path(path);
7456                 nr_del++;
7457         }
7458         err = btrfs_commit_transaction(trans, root);
7459         if (err && !ret)
7460                 ret = err;
7461 out:
7462         while (!list_empty(&delete_list)) {
7463                 tmp = to_extent_record(delete_list.next);
7464                 list_del_init(&tmp->list);
7465                 if (tmp == rec)
7466                         continue;
7467                 free(tmp);
7468         }
7469
7470         while (!list_empty(&rec->dups)) {
7471                 tmp = to_extent_record(rec->dups.next);
7472                 list_del_init(&tmp->list);
7473                 free(tmp);
7474         }
7475
7476         btrfs_free_path(path);
7477
7478         if (!ret && !nr_del)
7479                 rec->num_duplicates = 0;
7480
7481         return ret ? ret : nr_del;
7482 }
7483
7484 static int find_possible_backrefs(struct btrfs_fs_info *info,
7485                                   struct btrfs_path *path,
7486                                   struct cache_tree *extent_cache,
7487                                   struct extent_record *rec)
7488 {
7489         struct btrfs_root *root;
7490         struct extent_backref *back;
7491         struct data_backref *dback;
7492         struct cache_extent *cache;
7493         struct btrfs_file_extent_item *fi;
7494         struct btrfs_key key;
7495         u64 bytenr, bytes;
7496         int ret;
7497
7498         list_for_each_entry(back, &rec->backrefs, list) {
7499                 /* Don't care about full backrefs (poor unloved backrefs) */
7500                 if (back->full_backref || !back->is_data)
7501                         continue;
7502
7503                 dback = to_data_backref(back);
7504
7505                 /* We found this one, we don't need to do a lookup */
7506                 if (dback->found_ref)
7507                         continue;
7508
7509                 key.objectid = dback->root;
7510                 key.type = BTRFS_ROOT_ITEM_KEY;
7511                 key.offset = (u64)-1;
7512
7513                 root = btrfs_read_fs_root(info, &key);
7514
7515                 /* No root, definitely a bad ref, skip */
7516                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7517                         continue;
7518                 /* Other err, exit */
7519                 if (IS_ERR(root))
7520                         return PTR_ERR(root);
7521
7522                 key.objectid = dback->owner;
7523                 key.type = BTRFS_EXTENT_DATA_KEY;
7524                 key.offset = dback->offset;
7525                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7526                 if (ret) {
7527                         btrfs_release_path(path);
7528                         if (ret < 0)
7529                                 return ret;
7530                         /* Didn't find it, we can carry on */
7531                         ret = 0;
7532                         continue;
7533                 }
7534
7535                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7536                                     struct btrfs_file_extent_item);
7537                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7538                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7539                 btrfs_release_path(path);
7540                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7541                 if (cache) {
7542                         struct extent_record *tmp;
7543                         tmp = container_of(cache, struct extent_record, cache);
7544
7545                         /*
7546                          * If we found an extent record for the bytenr for this
7547                          * particular backref then we can't add it to our
7548                          * current extent record.  We only want to add backrefs
7549                          * that don't have a corresponding extent item in the
7550                          * extent tree since they likely belong to this record
7551                          * and we need to fix it if it doesn't match bytenrs.
7552                          */
7553                         if  (tmp->found_rec)
7554                                 continue;
7555                 }
7556
7557                 dback->found_ref += 1;
7558                 dback->disk_bytenr = bytenr;
7559                 dback->bytes = bytes;
7560
7561                 /*
7562                  * Set this so the verify backref code knows not to trust the
7563                  * values in this backref.
7564                  */
7565                 back->broken = 1;
7566         }
7567
7568         return 0;
7569 }
7570
7571 /*
7572  * Record orphan data ref into corresponding root.
7573  *
7574  * Return 0 if the extent item contains data ref and recorded.
7575  * Return 1 if the extent item contains no useful data ref
7576  *   On that case, it may contains only shared_dataref or metadata backref
7577  *   or the file extent exists(this should be handled by the extent bytenr
7578  *   recovery routine)
7579  * Return <0 if something goes wrong.
7580  */
7581 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7582                                       struct extent_record *rec)
7583 {
7584         struct btrfs_key key;
7585         struct btrfs_root *dest_root;
7586         struct extent_backref *back;
7587         struct data_backref *dback;
7588         struct orphan_data_extent *orphan;
7589         struct btrfs_path *path;
7590         int recorded_data_ref = 0;
7591         int ret = 0;
7592
7593         if (rec->metadata)
7594                 return 1;
7595         path = btrfs_alloc_path();
7596         if (!path)
7597                 return -ENOMEM;
7598         list_for_each_entry(back, &rec->backrefs, list) {
7599                 if (back->full_backref || !back->is_data ||
7600                     !back->found_extent_tree)
7601                         continue;
7602                 dback = to_data_backref(back);
7603                 if (dback->found_ref)
7604                         continue;
7605                 key.objectid = dback->root;
7606                 key.type = BTRFS_ROOT_ITEM_KEY;
7607                 key.offset = (u64)-1;
7608
7609                 dest_root = btrfs_read_fs_root(fs_info, &key);
7610
7611                 /* For non-exist root we just skip it */
7612                 if (IS_ERR(dest_root) || !dest_root)
7613                         continue;
7614
7615                 key.objectid = dback->owner;
7616                 key.type = BTRFS_EXTENT_DATA_KEY;
7617                 key.offset = dback->offset;
7618
7619                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7620                 /*
7621                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7622                  * we need to record it for inode/file extent rebuild.
7623                  * For ret > 0, we record it only for file extent rebuild.
7624                  * For ret == 0, the file extent exists but only bytenr
7625                  * mismatch, let the original bytenr fix routine to handle,
7626                  * don't record it.
7627                  */
7628                 if (ret == 0)
7629                         continue;
7630                 ret = 0;
7631                 orphan = malloc(sizeof(*orphan));
7632                 if (!orphan) {
7633                         ret = -ENOMEM;
7634                         goto out;
7635                 }
7636                 INIT_LIST_HEAD(&orphan->list);
7637                 orphan->root = dback->root;
7638                 orphan->objectid = dback->owner;
7639                 orphan->offset = dback->offset;
7640                 orphan->disk_bytenr = rec->cache.start;
7641                 orphan->disk_len = rec->cache.size;
7642                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7643                 recorded_data_ref = 1;
7644         }
7645 out:
7646         btrfs_free_path(path);
7647         if (!ret)
7648                 return !recorded_data_ref;
7649         else
7650                 return ret;
7651 }
7652
7653 /*
7654  * when an incorrect extent item is found, this will delete
7655  * all of the existing entries for it and recreate them
7656  * based on what the tree scan found.
7657  */
7658 static int fixup_extent_refs(struct btrfs_fs_info *info,
7659                              struct cache_tree *extent_cache,
7660                              struct extent_record *rec)
7661 {
7662         struct btrfs_trans_handle *trans = NULL;
7663         int ret;
7664         struct btrfs_path *path;
7665         struct list_head *cur = rec->backrefs.next;
7666         struct cache_extent *cache;
7667         struct extent_backref *back;
7668         int allocated = 0;
7669         u64 flags = 0;
7670
7671         if (rec->flag_block_full_backref)
7672                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7673
7674         path = btrfs_alloc_path();
7675         if (!path)
7676                 return -ENOMEM;
7677
7678         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7679                 /*
7680                  * Sometimes the backrefs themselves are so broken they don't
7681                  * get attached to any meaningful rec, so first go back and
7682                  * check any of our backrefs that we couldn't find and throw
7683                  * them into the list if we find the backref so that
7684                  * verify_backrefs can figure out what to do.
7685                  */
7686                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7687                 if (ret < 0)
7688                         goto out;
7689         }
7690
7691         /* step one, make sure all of the backrefs agree */
7692         ret = verify_backrefs(info, path, rec);
7693         if (ret < 0)
7694                 goto out;
7695
7696         trans = btrfs_start_transaction(info->extent_root, 1);
7697         if (IS_ERR(trans)) {
7698                 ret = PTR_ERR(trans);
7699                 goto out;
7700         }
7701
7702         /* step two, delete all the existing records */
7703         ret = delete_extent_records(trans, info->extent_root, path,
7704                                     rec->start, rec->max_size);
7705
7706         if (ret < 0)
7707                 goto out;
7708
7709         /* was this block corrupt?  If so, don't add references to it */
7710         cache = lookup_cache_extent(info->corrupt_blocks,
7711                                     rec->start, rec->max_size);
7712         if (cache) {
7713                 ret = 0;
7714                 goto out;
7715         }
7716
7717         /* step three, recreate all the refs we did find */
7718         while(cur != &rec->backrefs) {
7719                 back = to_extent_backref(cur);
7720                 cur = cur->next;
7721
7722                 /*
7723                  * if we didn't find any references, don't create a
7724                  * new extent record
7725                  */
7726                 if (!back->found_ref)
7727                         continue;
7728
7729                 rec->bad_full_backref = 0;
7730                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7731                 allocated = 1;
7732
7733                 if (ret)
7734                         goto out;
7735         }
7736 out:
7737         if (trans) {
7738                 int err = btrfs_commit_transaction(trans, info->extent_root);
7739                 if (!ret)
7740                         ret = err;
7741         }
7742
7743         btrfs_free_path(path);
7744         return ret;
7745 }
7746
7747 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7748                               struct extent_record *rec)
7749 {
7750         struct btrfs_trans_handle *trans;
7751         struct btrfs_root *root = fs_info->extent_root;
7752         struct btrfs_path *path;
7753         struct btrfs_extent_item *ei;
7754         struct btrfs_key key;
7755         u64 flags;
7756         int ret = 0;
7757
7758         key.objectid = rec->start;
7759         if (rec->metadata) {
7760                 key.type = BTRFS_METADATA_ITEM_KEY;
7761                 key.offset = rec->info_level;
7762         } else {
7763                 key.type = BTRFS_EXTENT_ITEM_KEY;
7764                 key.offset = rec->max_size;
7765         }
7766
7767         path = btrfs_alloc_path();
7768         if (!path)
7769                 return -ENOMEM;
7770
7771         trans = btrfs_start_transaction(root, 0);
7772         if (IS_ERR(trans)) {
7773                 btrfs_free_path(path);
7774                 return PTR_ERR(trans);
7775         }
7776
7777         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7778         if (ret < 0) {
7779                 btrfs_free_path(path);
7780                 btrfs_commit_transaction(trans, root);
7781                 return ret;
7782         } else if (ret) {
7783                 fprintf(stderr, "Didn't find extent for %llu\n",
7784                         (unsigned long long)rec->start);
7785                 btrfs_free_path(path);
7786                 btrfs_commit_transaction(trans, root);
7787                 return -ENOENT;
7788         }
7789
7790         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7791                             struct btrfs_extent_item);
7792         flags = btrfs_extent_flags(path->nodes[0], ei);
7793         if (rec->flag_block_full_backref) {
7794                 fprintf(stderr, "setting full backref on %llu\n",
7795                         (unsigned long long)key.objectid);
7796                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7797         } else {
7798                 fprintf(stderr, "clearing full backref on %llu\n",
7799                         (unsigned long long)key.objectid);
7800                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7801         }
7802         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7803         btrfs_mark_buffer_dirty(path->nodes[0]);
7804         btrfs_free_path(path);
7805         return btrfs_commit_transaction(trans, root);
7806 }
7807
7808 /* right now we only prune from the extent allocation tree */
7809 static int prune_one_block(struct btrfs_trans_handle *trans,
7810                            struct btrfs_fs_info *info,
7811                            struct btrfs_corrupt_block *corrupt)
7812 {
7813         int ret;
7814         struct btrfs_path path;
7815         struct extent_buffer *eb;
7816         u64 found;
7817         int slot;
7818         int nritems;
7819         int level = corrupt->level + 1;
7820
7821         btrfs_init_path(&path);
7822 again:
7823         /* we want to stop at the parent to our busted block */
7824         path.lowest_level = level;
7825
7826         ret = btrfs_search_slot(trans, info->extent_root,
7827                                 &corrupt->key, &path, -1, 1);
7828
7829         if (ret < 0)
7830                 goto out;
7831
7832         eb = path.nodes[level];
7833         if (!eb) {
7834                 ret = -ENOENT;
7835                 goto out;
7836         }
7837
7838         /*
7839          * hopefully the search gave us the block we want to prune,
7840          * lets try that first
7841          */
7842         slot = path.slots[level];
7843         found =  btrfs_node_blockptr(eb, slot);
7844         if (found == corrupt->cache.start)
7845                 goto del_ptr;
7846
7847         nritems = btrfs_header_nritems(eb);
7848
7849         /* the search failed, lets scan this node and hope we find it */
7850         for (slot = 0; slot < nritems; slot++) {
7851                 found =  btrfs_node_blockptr(eb, slot);
7852                 if (found == corrupt->cache.start)
7853                         goto del_ptr;
7854         }
7855         /*
7856          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7857          * to this block
7858          */
7859         if (eb == info->extent_root->node) {
7860                 ret = -ENOENT;
7861                 goto out;
7862         } else {
7863                 level++;
7864                 btrfs_release_path(&path);
7865                 goto again;
7866         }
7867
7868 del_ptr:
7869         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7870         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7871
7872 out:
7873         btrfs_release_path(&path);
7874         return ret;
7875 }
7876
7877 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7878 {
7879         struct btrfs_trans_handle *trans = NULL;
7880         struct cache_extent *cache;
7881         struct btrfs_corrupt_block *corrupt;
7882
7883         while (1) {
7884                 cache = search_cache_extent(info->corrupt_blocks, 0);
7885                 if (!cache)
7886                         break;
7887                 if (!trans) {
7888                         trans = btrfs_start_transaction(info->extent_root, 1);
7889                         if (IS_ERR(trans))
7890                                 return PTR_ERR(trans);
7891                 }
7892                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7893                 prune_one_block(trans, info, corrupt);
7894                 remove_cache_extent(info->corrupt_blocks, cache);
7895         }
7896         if (trans)
7897                 return btrfs_commit_transaction(trans, info->extent_root);
7898         return 0;
7899 }
7900
7901 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7902 {
7903         struct btrfs_block_group_cache *cache;
7904         u64 start, end;
7905         int ret;
7906
7907         while (1) {
7908                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7909                                             &start, &end, EXTENT_DIRTY);
7910                 if (ret)
7911                         break;
7912                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7913                                    GFP_NOFS);
7914         }
7915
7916         start = 0;
7917         while (1) {
7918                 cache = btrfs_lookup_first_block_group(fs_info, start);
7919                 if (!cache)
7920                         break;
7921                 if (cache->cached)
7922                         cache->cached = 0;
7923                 start = cache->key.objectid + cache->key.offset;
7924         }
7925 }
7926
7927 static int check_extent_refs(struct btrfs_root *root,
7928                              struct cache_tree *extent_cache)
7929 {
7930         struct extent_record *rec;
7931         struct cache_extent *cache;
7932         int err = 0;
7933         int ret = 0;
7934         int fixed = 0;
7935         int had_dups = 0;
7936         int recorded = 0;
7937
7938         if (repair) {
7939                 /*
7940                  * if we're doing a repair, we have to make sure
7941                  * we don't allocate from the problem extents.
7942                  * In the worst case, this will be all the
7943                  * extents in the FS
7944                  */
7945                 cache = search_cache_extent(extent_cache, 0);
7946                 while(cache) {
7947                         rec = container_of(cache, struct extent_record, cache);
7948                         set_extent_dirty(root->fs_info->excluded_extents,
7949                                          rec->start,
7950                                          rec->start + rec->max_size - 1,
7951                                          GFP_NOFS);
7952                         cache = next_cache_extent(cache);
7953                 }
7954
7955                 /* pin down all the corrupted blocks too */
7956                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7957                 while(cache) {
7958                         set_extent_dirty(root->fs_info->excluded_extents,
7959                                          cache->start,
7960                                          cache->start + cache->size - 1,
7961                                          GFP_NOFS);
7962                         cache = next_cache_extent(cache);
7963                 }
7964                 prune_corrupt_blocks(root->fs_info);
7965                 reset_cached_block_groups(root->fs_info);
7966         }
7967
7968         reset_cached_block_groups(root->fs_info);
7969
7970         /*
7971          * We need to delete any duplicate entries we find first otherwise we
7972          * could mess up the extent tree when we have backrefs that actually
7973          * belong to a different extent item and not the weird duplicate one.
7974          */
7975         while (repair && !list_empty(&duplicate_extents)) {
7976                 rec = to_extent_record(duplicate_extents.next);
7977                 list_del_init(&rec->list);
7978
7979                 /* Sometimes we can find a backref before we find an actual
7980                  * extent, so we need to process it a little bit to see if there
7981                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7982                  * if this is a backref screwup.  If we need to delete stuff
7983                  * process_duplicates() will return 0, otherwise it will return
7984                  * 1 and we
7985                  */
7986                 if (process_duplicates(root, extent_cache, rec))
7987                         continue;
7988                 ret = delete_duplicate_records(root, rec);
7989                 if (ret < 0)
7990                         return ret;
7991                 /*
7992                  * delete_duplicate_records will return the number of entries
7993                  * deleted, so if it's greater than 0 then we know we actually
7994                  * did something and we need to remove.
7995                  */
7996                 if (ret)
7997                         had_dups = 1;
7998         }
7999
8000         if (had_dups)
8001                 return -EAGAIN;
8002
8003         while(1) {
8004                 int cur_err = 0;
8005
8006                 fixed = 0;
8007                 recorded = 0;
8008                 cache = search_cache_extent(extent_cache, 0);
8009                 if (!cache)
8010                         break;
8011                 rec = container_of(cache, struct extent_record, cache);
8012                 if (rec->num_duplicates) {
8013                         fprintf(stderr, "extent item %llu has multiple extent "
8014                                 "items\n", (unsigned long long)rec->start);
8015                         err = 1;
8016                         cur_err = 1;
8017                 }
8018
8019                 if (rec->refs != rec->extent_item_refs) {
8020                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
8021                                 (unsigned long long)rec->start,
8022                                 (unsigned long long)rec->nr);
8023                         fprintf(stderr, "extent item %llu, found %llu\n",
8024                                 (unsigned long long)rec->extent_item_refs,
8025                                 (unsigned long long)rec->refs);
8026                         ret = record_orphan_data_extents(root->fs_info, rec);
8027                         if (ret < 0)
8028                                 goto repair_abort;
8029                         if (ret == 0) {
8030                                 recorded = 1;
8031                         } else {
8032                                 /*
8033                                  * we can't use the extent to repair file
8034                                  * extent, let the fallback method handle it.
8035                                  */
8036                                 if (!fixed && repair) {
8037                                         ret = fixup_extent_refs(
8038                                                         root->fs_info,
8039                                                         extent_cache, rec);
8040                                         if (ret)
8041                                                 goto repair_abort;
8042                                         fixed = 1;
8043                                 }
8044                         }
8045                         err = 1;
8046                         cur_err = 1;
8047                 }
8048                 if (all_backpointers_checked(rec, 1)) {
8049                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8050                                 (unsigned long long)rec->start,
8051                                 (unsigned long long)rec->nr);
8052
8053                         if (!fixed && !recorded && repair) {
8054                                 ret = fixup_extent_refs(root->fs_info,
8055                                                         extent_cache, rec);
8056                                 if (ret)
8057                                         goto repair_abort;
8058                                 fixed = 1;
8059                         }
8060                         cur_err = 1;
8061                         err = 1;
8062                 }
8063                 if (!rec->owner_ref_checked) {
8064                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8065                                 (unsigned long long)rec->start,
8066                                 (unsigned long long)rec->nr);
8067                         if (!fixed && !recorded && repair) {
8068                                 ret = fixup_extent_refs(root->fs_info,
8069                                                         extent_cache, rec);
8070                                 if (ret)
8071                                         goto repair_abort;
8072                                 fixed = 1;
8073                         }
8074                         err = 1;
8075                         cur_err = 1;
8076                 }
8077                 if (rec->bad_full_backref) {
8078                         fprintf(stderr, "bad full backref, on [%llu]\n",
8079                                 (unsigned long long)rec->start);
8080                         if (repair) {
8081                                 ret = fixup_extent_flags(root->fs_info, rec);
8082                                 if (ret)
8083                                         goto repair_abort;
8084                                 fixed = 1;
8085                         }
8086                         err = 1;
8087                         cur_err = 1;
8088                 }
8089                 /*
8090                  * Although it's not a extent ref's problem, we reuse this
8091                  * routine for error reporting.
8092                  * No repair function yet.
8093                  */
8094                 if (rec->crossing_stripes) {
8095                         fprintf(stderr,
8096                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8097                                 rec->start, rec->start + rec->max_size);
8098                         err = 1;
8099                         cur_err = 1;
8100                 }
8101
8102                 if (rec->wrong_chunk_type) {
8103                         fprintf(stderr,
8104                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
8105                                 rec->start, rec->start + rec->max_size);
8106                         err = 1;
8107                         cur_err = 1;
8108                 }
8109
8110                 remove_cache_extent(extent_cache, cache);
8111                 free_all_extent_backrefs(rec);
8112                 if (!init_extent_tree && repair && (!cur_err || fixed))
8113                         clear_extent_dirty(root->fs_info->excluded_extents,
8114                                            rec->start,
8115                                            rec->start + rec->max_size - 1,
8116                                            GFP_NOFS);
8117                 free(rec);
8118         }
8119 repair_abort:
8120         if (repair) {
8121                 if (ret && ret != -EAGAIN) {
8122                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8123                         exit(1);
8124                 } else if (!ret) {
8125                         struct btrfs_trans_handle *trans;
8126
8127                         root = root->fs_info->extent_root;
8128                         trans = btrfs_start_transaction(root, 1);
8129                         if (IS_ERR(trans)) {
8130                                 ret = PTR_ERR(trans);
8131                                 goto repair_abort;
8132                         }
8133
8134                         btrfs_fix_block_accounting(trans, root);
8135                         ret = btrfs_commit_transaction(trans, root);
8136                         if (ret)
8137                                 goto repair_abort;
8138                 }
8139                 if (err)
8140                         fprintf(stderr, "repaired damaged extent references\n");
8141                 return ret;
8142         }
8143         return err;
8144 }
8145
8146 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8147 {
8148         u64 stripe_size;
8149
8150         if (type & BTRFS_BLOCK_GROUP_RAID0) {
8151                 stripe_size = length;
8152                 stripe_size /= num_stripes;
8153         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8154                 stripe_size = length * 2;
8155                 stripe_size /= num_stripes;
8156         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8157                 stripe_size = length;
8158                 stripe_size /= (num_stripes - 1);
8159         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8160                 stripe_size = length;
8161                 stripe_size /= (num_stripes - 2);
8162         } else {
8163                 stripe_size = length;
8164         }
8165         return stripe_size;
8166 }
8167
8168 /*
8169  * Check the chunk with its block group/dev list ref:
8170  * Return 0 if all refs seems valid.
8171  * Return 1 if part of refs seems valid, need later check for rebuild ref
8172  * like missing block group and needs to search extent tree to rebuild them.
8173  * Return -1 if essential refs are missing and unable to rebuild.
8174  */
8175 static int check_chunk_refs(struct chunk_record *chunk_rec,
8176                             struct block_group_tree *block_group_cache,
8177                             struct device_extent_tree *dev_extent_cache,
8178                             int silent)
8179 {
8180         struct cache_extent *block_group_item;
8181         struct block_group_record *block_group_rec;
8182         struct cache_extent *dev_extent_item;
8183         struct device_extent_record *dev_extent_rec;
8184         u64 devid;
8185         u64 offset;
8186         u64 length;
8187         int metadump_v2 = 0;
8188         int i;
8189         int ret = 0;
8190
8191         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8192                                                chunk_rec->offset,
8193                                                chunk_rec->length);
8194         if (block_group_item) {
8195                 block_group_rec = container_of(block_group_item,
8196                                                struct block_group_record,
8197                                                cache);
8198                 if (chunk_rec->length != block_group_rec->offset ||
8199                     chunk_rec->offset != block_group_rec->objectid ||
8200                     (!metadump_v2 &&
8201                      chunk_rec->type_flags != block_group_rec->flags)) {
8202                         if (!silent)
8203                                 fprintf(stderr,
8204                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8205                                         chunk_rec->objectid,
8206                                         chunk_rec->type,
8207                                         chunk_rec->offset,
8208                                         chunk_rec->length,
8209                                         chunk_rec->offset,
8210                                         chunk_rec->type_flags,
8211                                         block_group_rec->objectid,
8212                                         block_group_rec->type,
8213                                         block_group_rec->offset,
8214                                         block_group_rec->offset,
8215                                         block_group_rec->objectid,
8216                                         block_group_rec->flags);
8217                         ret = -1;
8218                 } else {
8219                         list_del_init(&block_group_rec->list);
8220                         chunk_rec->bg_rec = block_group_rec;
8221                 }
8222         } else {
8223                 if (!silent)
8224                         fprintf(stderr,
8225                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8226                                 chunk_rec->objectid,
8227                                 chunk_rec->type,
8228                                 chunk_rec->offset,
8229                                 chunk_rec->length,
8230                                 chunk_rec->offset,
8231                                 chunk_rec->type_flags);
8232                 ret = 1;
8233         }
8234
8235         if (metadump_v2)
8236                 return ret;
8237
8238         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8239                                     chunk_rec->num_stripes);
8240         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8241                 devid = chunk_rec->stripes[i].devid;
8242                 offset = chunk_rec->stripes[i].offset;
8243                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8244                                                        devid, offset, length);
8245                 if (dev_extent_item) {
8246                         dev_extent_rec = container_of(dev_extent_item,
8247                                                 struct device_extent_record,
8248                                                 cache);
8249                         if (dev_extent_rec->objectid != devid ||
8250                             dev_extent_rec->offset != offset ||
8251                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8252                             dev_extent_rec->length != length) {
8253                                 if (!silent)
8254                                         fprintf(stderr,
8255                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8256                                                 chunk_rec->objectid,
8257                                                 chunk_rec->type,
8258                                                 chunk_rec->offset,
8259                                                 chunk_rec->stripes[i].devid,
8260                                                 chunk_rec->stripes[i].offset,
8261                                                 dev_extent_rec->objectid,
8262                                                 dev_extent_rec->offset,
8263                                                 dev_extent_rec->length);
8264                                 ret = -1;
8265                         } else {
8266                                 list_move(&dev_extent_rec->chunk_list,
8267                                           &chunk_rec->dextents);
8268                         }
8269                 } else {
8270                         if (!silent)
8271                                 fprintf(stderr,
8272                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8273                                         chunk_rec->objectid,
8274                                         chunk_rec->type,
8275                                         chunk_rec->offset,
8276                                         chunk_rec->stripes[i].devid,
8277                                         chunk_rec->stripes[i].offset);
8278                         ret = -1;
8279                 }
8280         }
8281         return ret;
8282 }
8283
8284 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8285 int check_chunks(struct cache_tree *chunk_cache,
8286                  struct block_group_tree *block_group_cache,
8287                  struct device_extent_tree *dev_extent_cache,
8288                  struct list_head *good, struct list_head *bad,
8289                  struct list_head *rebuild, int silent)
8290 {
8291         struct cache_extent *chunk_item;
8292         struct chunk_record *chunk_rec;
8293         struct block_group_record *bg_rec;
8294         struct device_extent_record *dext_rec;
8295         int err;
8296         int ret = 0;
8297
8298         chunk_item = first_cache_extent(chunk_cache);
8299         while (chunk_item) {
8300                 chunk_rec = container_of(chunk_item, struct chunk_record,
8301                                          cache);
8302                 err = check_chunk_refs(chunk_rec, block_group_cache,
8303                                        dev_extent_cache, silent);
8304                 if (err < 0)
8305                         ret = err;
8306                 if (err == 0 && good)
8307                         list_add_tail(&chunk_rec->list, good);
8308                 if (err > 0 && rebuild)
8309                         list_add_tail(&chunk_rec->list, rebuild);
8310                 if (err < 0 && bad)
8311                         list_add_tail(&chunk_rec->list, bad);
8312                 chunk_item = next_cache_extent(chunk_item);
8313         }
8314
8315         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8316                 if (!silent)
8317                         fprintf(stderr,
8318                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8319                                 bg_rec->objectid,
8320                                 bg_rec->offset,
8321                                 bg_rec->flags);
8322                 if (!ret)
8323                         ret = 1;
8324         }
8325
8326         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8327                             chunk_list) {
8328                 if (!silent)
8329                         fprintf(stderr,
8330                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8331                                 dext_rec->objectid,
8332                                 dext_rec->offset,
8333                                 dext_rec->length);
8334                 if (!ret)
8335                         ret = 1;
8336         }
8337         return ret;
8338 }
8339
8340
8341 static int check_device_used(struct device_record *dev_rec,
8342                              struct device_extent_tree *dext_cache)
8343 {
8344         struct cache_extent *cache;
8345         struct device_extent_record *dev_extent_rec;
8346         u64 total_byte = 0;
8347
8348         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8349         while (cache) {
8350                 dev_extent_rec = container_of(cache,
8351                                               struct device_extent_record,
8352                                               cache);
8353                 if (dev_extent_rec->objectid != dev_rec->devid)
8354                         break;
8355
8356                 list_del_init(&dev_extent_rec->device_list);
8357                 total_byte += dev_extent_rec->length;
8358                 cache = next_cache_extent(cache);
8359         }
8360
8361         if (total_byte != dev_rec->byte_used) {
8362                 fprintf(stderr,
8363                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8364                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8365                         dev_rec->type, dev_rec->offset);
8366                 return -1;
8367         } else {
8368                 return 0;
8369         }
8370 }
8371
8372 /* check btrfs_dev_item -> btrfs_dev_extent */
8373 static int check_devices(struct rb_root *dev_cache,
8374                          struct device_extent_tree *dev_extent_cache)
8375 {
8376         struct rb_node *dev_node;
8377         struct device_record *dev_rec;
8378         struct device_extent_record *dext_rec;
8379         int err;
8380         int ret = 0;
8381
8382         dev_node = rb_first(dev_cache);
8383         while (dev_node) {
8384                 dev_rec = container_of(dev_node, struct device_record, node);
8385                 err = check_device_used(dev_rec, dev_extent_cache);
8386                 if (err)
8387                         ret = err;
8388
8389                 dev_node = rb_next(dev_node);
8390         }
8391         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8392                             device_list) {
8393                 fprintf(stderr,
8394                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8395                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8396                 if (!ret)
8397                         ret = 1;
8398         }
8399         return ret;
8400 }
8401
8402 static int add_root_item_to_list(struct list_head *head,
8403                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8404                                   u8 level, u8 drop_level,
8405                                   int level_size, struct btrfs_key *drop_key)
8406 {
8407
8408         struct root_item_record *ri_rec;
8409         ri_rec = malloc(sizeof(*ri_rec));
8410         if (!ri_rec)
8411                 return -ENOMEM;
8412         ri_rec->bytenr = bytenr;
8413         ri_rec->objectid = objectid;
8414         ri_rec->level = level;
8415         ri_rec->level_size = level_size;
8416         ri_rec->drop_level = drop_level;
8417         ri_rec->last_snapshot = last_snapshot;
8418         if (drop_key)
8419                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8420         list_add_tail(&ri_rec->list, head);
8421
8422         return 0;
8423 }
8424
8425 static void free_root_item_list(struct list_head *list)
8426 {
8427         struct root_item_record *ri_rec;
8428
8429         while (!list_empty(list)) {
8430                 ri_rec = list_first_entry(list, struct root_item_record,
8431                                           list);
8432                 list_del_init(&ri_rec->list);
8433                 free(ri_rec);
8434         }
8435 }
8436
8437 static int deal_root_from_list(struct list_head *list,
8438                                struct btrfs_root *root,
8439                                struct block_info *bits,
8440                                int bits_nr,
8441                                struct cache_tree *pending,
8442                                struct cache_tree *seen,
8443                                struct cache_tree *reada,
8444                                struct cache_tree *nodes,
8445                                struct cache_tree *extent_cache,
8446                                struct cache_tree *chunk_cache,
8447                                struct rb_root *dev_cache,
8448                                struct block_group_tree *block_group_cache,
8449                                struct device_extent_tree *dev_extent_cache)
8450 {
8451         int ret = 0;
8452         u64 last;
8453
8454         while (!list_empty(list)) {
8455                 struct root_item_record *rec;
8456                 struct extent_buffer *buf;
8457                 rec = list_entry(list->next,
8458                                  struct root_item_record, list);
8459                 last = 0;
8460                 buf = read_tree_block(root->fs_info->tree_root,
8461                                       rec->bytenr, rec->level_size, 0);
8462                 if (!extent_buffer_uptodate(buf)) {
8463                         free_extent_buffer(buf);
8464                         ret = -EIO;
8465                         break;
8466                 }
8467                 ret = add_root_to_pending(buf, extent_cache, pending,
8468                                     seen, nodes, rec->objectid);
8469                 if (ret < 0)
8470                         break;
8471                 /*
8472                  * To rebuild extent tree, we need deal with snapshot
8473                  * one by one, otherwise we deal with node firstly which
8474                  * can maximize readahead.
8475                  */
8476                 while (1) {
8477                         ret = run_next_block(root, bits, bits_nr, &last,
8478                                              pending, seen, reada, nodes,
8479                                              extent_cache, chunk_cache,
8480                                              dev_cache, block_group_cache,
8481                                              dev_extent_cache, rec);
8482                         if (ret != 0)
8483                                 break;
8484                 }
8485                 free_extent_buffer(buf);
8486                 list_del(&rec->list);
8487                 free(rec);
8488                 if (ret < 0)
8489                         break;
8490         }
8491         while (ret >= 0) {
8492                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8493                                      reada, nodes, extent_cache, chunk_cache,
8494                                      dev_cache, block_group_cache,
8495                                      dev_extent_cache, NULL);
8496                 if (ret != 0) {
8497                         if (ret > 0)
8498                                 ret = 0;
8499                         break;
8500                 }
8501         }
8502         return ret;
8503 }
8504
8505 static int check_chunks_and_extents(struct btrfs_root *root)
8506 {
8507         struct rb_root dev_cache;
8508         struct cache_tree chunk_cache;
8509         struct block_group_tree block_group_cache;
8510         struct device_extent_tree dev_extent_cache;
8511         struct cache_tree extent_cache;
8512         struct cache_tree seen;
8513         struct cache_tree pending;
8514         struct cache_tree reada;
8515         struct cache_tree nodes;
8516         struct extent_io_tree excluded_extents;
8517         struct cache_tree corrupt_blocks;
8518         struct btrfs_path path;
8519         struct btrfs_key key;
8520         struct btrfs_key found_key;
8521         int ret, err = 0;
8522         struct block_info *bits;
8523         int bits_nr;
8524         struct extent_buffer *leaf;
8525         int slot;
8526         struct btrfs_root_item ri;
8527         struct list_head dropping_trees;
8528         struct list_head normal_trees;
8529         struct btrfs_root *root1;
8530         u64 objectid;
8531         u32 level_size;
8532         u8 level;
8533
8534         dev_cache = RB_ROOT;
8535         cache_tree_init(&chunk_cache);
8536         block_group_tree_init(&block_group_cache);
8537         device_extent_tree_init(&dev_extent_cache);
8538
8539         cache_tree_init(&extent_cache);
8540         cache_tree_init(&seen);
8541         cache_tree_init(&pending);
8542         cache_tree_init(&nodes);
8543         cache_tree_init(&reada);
8544         cache_tree_init(&corrupt_blocks);
8545         extent_io_tree_init(&excluded_extents);
8546         INIT_LIST_HEAD(&dropping_trees);
8547         INIT_LIST_HEAD(&normal_trees);
8548
8549         if (repair) {
8550                 root->fs_info->excluded_extents = &excluded_extents;
8551                 root->fs_info->fsck_extent_cache = &extent_cache;
8552                 root->fs_info->free_extent_hook = free_extent_hook;
8553                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8554         }
8555
8556         bits_nr = 1024;
8557         bits = malloc(bits_nr * sizeof(struct block_info));
8558         if (!bits) {
8559                 perror("malloc");
8560                 exit(1);
8561         }
8562
8563         if (ctx.progress_enabled) {
8564                 ctx.tp = TASK_EXTENTS;
8565                 task_start(ctx.info);
8566         }
8567
8568 again:
8569         root1 = root->fs_info->tree_root;
8570         level = btrfs_header_level(root1->node);
8571         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8572                                     root1->node->start, 0, level, 0,
8573                                     root1->nodesize, NULL);
8574         if (ret < 0)
8575                 goto out;
8576         root1 = root->fs_info->chunk_root;
8577         level = btrfs_header_level(root1->node);
8578         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8579                                     root1->node->start, 0, level, 0,
8580                                     root1->nodesize, NULL);
8581         if (ret < 0)
8582                 goto out;
8583         btrfs_init_path(&path);
8584         key.offset = 0;
8585         key.objectid = 0;
8586         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8587         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8588                                         &key, &path, 0, 0);
8589         if (ret < 0)
8590                 goto out;
8591         while(1) {
8592                 leaf = path.nodes[0];
8593                 slot = path.slots[0];
8594                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8595                         ret = btrfs_next_leaf(root, &path);
8596                         if (ret != 0)
8597                                 break;
8598                         leaf = path.nodes[0];
8599                         slot = path.slots[0];
8600                 }
8601                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8602                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8603                         unsigned long offset;
8604                         u64 last_snapshot;
8605
8606                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8607                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8608                         last_snapshot = btrfs_root_last_snapshot(&ri);
8609                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8610                                 level = btrfs_root_level(&ri);
8611                                 level_size = root->nodesize;
8612                                 ret = add_root_item_to_list(&normal_trees,
8613                                                 found_key.objectid,
8614                                                 btrfs_root_bytenr(&ri),
8615                                                 last_snapshot, level,
8616                                                 0, level_size, NULL);
8617                                 if (ret < 0)
8618                                         goto out;
8619                         } else {
8620                                 level = btrfs_root_level(&ri);
8621                                 level_size = root->nodesize;
8622                                 objectid = found_key.objectid;
8623                                 btrfs_disk_key_to_cpu(&found_key,
8624                                                       &ri.drop_progress);
8625                                 ret = add_root_item_to_list(&dropping_trees,
8626                                                 objectid,
8627                                                 btrfs_root_bytenr(&ri),
8628                                                 last_snapshot, level,
8629                                                 ri.drop_level,
8630                                                 level_size, &found_key);
8631                                 if (ret < 0)
8632                                         goto out;
8633                         }
8634                 }
8635                 path.slots[0]++;
8636         }
8637         btrfs_release_path(&path);
8638
8639         /*
8640          * check_block can return -EAGAIN if it fixes something, please keep
8641          * this in mind when dealing with return values from these functions, if
8642          * we get -EAGAIN we want to fall through and restart the loop.
8643          */
8644         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8645                                   &seen, &reada, &nodes, &extent_cache,
8646                                   &chunk_cache, &dev_cache, &block_group_cache,
8647                                   &dev_extent_cache);
8648         if (ret < 0) {
8649                 if (ret == -EAGAIN)
8650                         goto loop;
8651                 goto out;
8652         }
8653         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8654                                   &pending, &seen, &reada, &nodes,
8655                                   &extent_cache, &chunk_cache, &dev_cache,
8656                                   &block_group_cache, &dev_extent_cache);
8657         if (ret < 0) {
8658                 if (ret == -EAGAIN)
8659                         goto loop;
8660                 goto out;
8661         }
8662
8663         ret = check_chunks(&chunk_cache, &block_group_cache,
8664                            &dev_extent_cache, NULL, NULL, NULL, 0);
8665         if (ret) {
8666                 if (ret == -EAGAIN)
8667                         goto loop;
8668                 err = ret;
8669         }
8670
8671         ret = check_extent_refs(root, &extent_cache);
8672         if (ret < 0) {
8673                 if (ret == -EAGAIN)
8674                         goto loop;
8675                 goto out;
8676         }
8677
8678         ret = check_devices(&dev_cache, &dev_extent_cache);
8679         if (ret && err)
8680                 ret = err;
8681
8682 out:
8683         task_stop(ctx.info);
8684         if (repair) {
8685                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8686                 extent_io_tree_cleanup(&excluded_extents);
8687                 root->fs_info->fsck_extent_cache = NULL;
8688                 root->fs_info->free_extent_hook = NULL;
8689                 root->fs_info->corrupt_blocks = NULL;
8690                 root->fs_info->excluded_extents = NULL;
8691         }
8692         free(bits);
8693         free_chunk_cache_tree(&chunk_cache);
8694         free_device_cache_tree(&dev_cache);
8695         free_block_group_tree(&block_group_cache);
8696         free_device_extent_tree(&dev_extent_cache);
8697         free_extent_cache_tree(&seen);
8698         free_extent_cache_tree(&pending);
8699         free_extent_cache_tree(&reada);
8700         free_extent_cache_tree(&nodes);
8701         return ret;
8702 loop:
8703         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8704         free_extent_cache_tree(&seen);
8705         free_extent_cache_tree(&pending);
8706         free_extent_cache_tree(&reada);
8707         free_extent_cache_tree(&nodes);
8708         free_chunk_cache_tree(&chunk_cache);
8709         free_block_group_tree(&block_group_cache);
8710         free_device_cache_tree(&dev_cache);
8711         free_device_extent_tree(&dev_extent_cache);
8712         free_extent_record_cache(root->fs_info, &extent_cache);
8713         free_root_item_list(&normal_trees);
8714         free_root_item_list(&dropping_trees);
8715         extent_io_tree_cleanup(&excluded_extents);
8716         goto again;
8717 }
8718
8719 /*
8720  * Check backrefs of a tree block given by @bytenr or @eb.
8721  *
8722  * @root:       the root containing the @bytenr or @eb
8723  * @eb:         tree block extent buffer, can be NULL
8724  * @bytenr:     bytenr of the tree block to search
8725  * @level:      tree level of the tree block
8726  * @owner:      owner of the tree block
8727  *
8728  * Return >0 for any error found and output error message
8729  * Return 0 for no error found
8730  */
8731 static int check_tree_block_ref(struct btrfs_root *root,
8732                                 struct extent_buffer *eb, u64 bytenr,
8733                                 int level, u64 owner)
8734 {
8735         struct btrfs_key key;
8736         struct btrfs_root *extent_root = root->fs_info->extent_root;
8737         struct btrfs_path path;
8738         struct btrfs_extent_item *ei;
8739         struct btrfs_extent_inline_ref *iref;
8740         struct extent_buffer *leaf;
8741         unsigned long end;
8742         unsigned long ptr;
8743         int slot;
8744         int skinny_level;
8745         int type;
8746         u32 nodesize = root->nodesize;
8747         u32 item_size;
8748         u64 offset;
8749         int found_ref = 0;
8750         int err = 0;
8751         int ret;
8752
8753         btrfs_init_path(&path);
8754         key.objectid = bytenr;
8755         if (btrfs_fs_incompat(root->fs_info,
8756                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8757                 key.type = BTRFS_METADATA_ITEM_KEY;
8758         else
8759                 key.type = BTRFS_EXTENT_ITEM_KEY;
8760         key.offset = (u64)-1;
8761
8762         /* Search for the backref in extent tree */
8763         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8764         if (ret < 0) {
8765                 err |= BACKREF_MISSING;
8766                 goto out;
8767         }
8768         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8769         if (ret) {
8770                 err |= BACKREF_MISSING;
8771                 goto out;
8772         }
8773
8774         leaf = path.nodes[0];
8775         slot = path.slots[0];
8776         btrfs_item_key_to_cpu(leaf, &key, slot);
8777
8778         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8779
8780         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8781                 skinny_level = (int)key.offset;
8782                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8783         } else {
8784                 struct btrfs_tree_block_info *info;
8785
8786                 info = (struct btrfs_tree_block_info *)(ei + 1);
8787                 skinny_level = btrfs_tree_block_level(leaf, info);
8788                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8789         }
8790
8791         if (eb) {
8792                 u64 header_gen;
8793                 u64 extent_gen;
8794
8795                 if (!(btrfs_extent_flags(leaf, ei) &
8796                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8797                         error(
8798                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8799                                 key.objectid, nodesize,
8800                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8801                         err = BACKREF_MISMATCH;
8802                 }
8803                 header_gen = btrfs_header_generation(eb);
8804                 extent_gen = btrfs_extent_generation(leaf, ei);
8805                 if (header_gen != extent_gen) {
8806                         error(
8807         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8808                                 key.objectid, nodesize, header_gen,
8809                                 extent_gen);
8810                         err = BACKREF_MISMATCH;
8811                 }
8812                 if (level != skinny_level) {
8813                         error(
8814                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8815                                 key.objectid, nodesize, level, skinny_level);
8816                         err = BACKREF_MISMATCH;
8817                 }
8818                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8819                         error(
8820                         "extent[%llu %u] is referred by other roots than %llu",
8821                                 key.objectid, nodesize, root->objectid);
8822                         err = BACKREF_MISMATCH;
8823                 }
8824         }
8825
8826         /*
8827          * Iterate the extent/metadata item to find the exact backref
8828          */
8829         item_size = btrfs_item_size_nr(leaf, slot);
8830         ptr = (unsigned long)iref;
8831         end = (unsigned long)ei + item_size;
8832         while (ptr < end) {
8833                 iref = (struct btrfs_extent_inline_ref *)ptr;
8834                 type = btrfs_extent_inline_ref_type(leaf, iref);
8835                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8836
8837                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8838                         (offset == root->objectid || offset == owner)) {
8839                         found_ref = 1;
8840                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8841                         /* Check if the backref points to valid referencer */
8842                         found_ref = !check_tree_block_ref(root, NULL, offset,
8843                                                           level + 1, owner);
8844                 }
8845
8846                 if (found_ref)
8847                         break;
8848                 ptr += btrfs_extent_inline_ref_size(type);
8849         }
8850
8851         /*
8852          * Inlined extent item doesn't have what we need, check
8853          * TREE_BLOCK_REF_KEY
8854          */
8855         if (!found_ref) {
8856                 btrfs_release_path(&path);
8857                 key.objectid = bytenr;
8858                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8859                 key.offset = root->objectid;
8860
8861                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8862                 if (!ret)
8863                         found_ref = 1;
8864         }
8865         if (!found_ref)
8866                 err |= BACKREF_MISSING;
8867 out:
8868         btrfs_release_path(&path);
8869         if (eb && (err & BACKREF_MISSING))
8870                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8871                         bytenr, nodesize, owner, level);
8872         return err;
8873 }
8874
8875 /*
8876  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8877  *
8878  * Return >0 any error found and output error message
8879  * Return 0 for no error found
8880  */
8881 static int check_extent_data_item(struct btrfs_root *root,
8882                                   struct extent_buffer *eb, int slot)
8883 {
8884         struct btrfs_file_extent_item *fi;
8885         struct btrfs_path path;
8886         struct btrfs_root *extent_root = root->fs_info->extent_root;
8887         struct btrfs_key fi_key;
8888         struct btrfs_key dbref_key;
8889         struct extent_buffer *leaf;
8890         struct btrfs_extent_item *ei;
8891         struct btrfs_extent_inline_ref *iref;
8892         struct btrfs_extent_data_ref *dref;
8893         u64 owner;
8894         u64 file_extent_gen;
8895         u64 disk_bytenr;
8896         u64 disk_num_bytes;
8897         u64 extent_num_bytes;
8898         u64 extent_flags;
8899         u64 extent_gen;
8900         u32 item_size;
8901         unsigned long end;
8902         unsigned long ptr;
8903         int type;
8904         u64 ref_root;
8905         int found_dbackref = 0;
8906         int err = 0;
8907         int ret;
8908
8909         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8910         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8911         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8912
8913         /* Nothing to check for hole and inline data extents */
8914         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8915             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8916                 return 0;
8917
8918         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8919         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8920         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8921
8922         /* Check unaligned disk_num_bytes and num_bytes */
8923         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8924                 error(
8925 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8926                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8927                         root->sectorsize);
8928                 err |= BYTES_UNALIGNED;
8929         } else {
8930                 data_bytes_allocated += disk_num_bytes;
8931         }
8932         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8933                 error(
8934 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8935                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8936                         root->sectorsize);
8937                 err |= BYTES_UNALIGNED;
8938         } else {
8939                 data_bytes_referenced += extent_num_bytes;
8940         }
8941         owner = btrfs_header_owner(eb);
8942
8943         /* Check the extent item of the file extent in extent tree */
8944         btrfs_init_path(&path);
8945         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8946         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8947         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8948
8949         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8950         if (ret) {
8951                 err |= BACKREF_MISSING;
8952                 goto error;
8953         }
8954
8955         leaf = path.nodes[0];
8956         slot = path.slots[0];
8957         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8958
8959         extent_flags = btrfs_extent_flags(leaf, ei);
8960         extent_gen = btrfs_extent_generation(leaf, ei);
8961
8962         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8963                 error(
8964                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8965                     disk_bytenr, disk_num_bytes,
8966                     BTRFS_EXTENT_FLAG_DATA);
8967                 err |= BACKREF_MISMATCH;
8968         }
8969
8970         if (file_extent_gen < extent_gen) {
8971                 error(
8972 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8973                         disk_bytenr, disk_num_bytes, file_extent_gen,
8974                         extent_gen);
8975                 err |= BACKREF_MISMATCH;
8976         }
8977
8978         /* Check data backref inside that extent item */
8979         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8980         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8981         ptr = (unsigned long)iref;
8982         end = (unsigned long)ei + item_size;
8983         while (ptr < end) {
8984                 iref = (struct btrfs_extent_inline_ref *)ptr;
8985                 type = btrfs_extent_inline_ref_type(leaf, iref);
8986                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8987
8988                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8989                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8990                         if (ref_root == owner || ref_root == root->objectid)
8991                                 found_dbackref = 1;
8992                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8993                         found_dbackref = !check_tree_block_ref(root, NULL,
8994                                 btrfs_extent_inline_ref_offset(leaf, iref),
8995                                 0, owner);
8996                 }
8997
8998                 if (found_dbackref)
8999                         break;
9000                 ptr += btrfs_extent_inline_ref_size(type);
9001         }
9002
9003         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9004         if (!found_dbackref) {
9005                 btrfs_release_path(&path);
9006
9007                 btrfs_init_path(&path);
9008                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9009                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9010                 dbref_key.offset = hash_extent_data_ref(root->objectid,
9011                                 fi_key.objectid, fi_key.offset);
9012
9013                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9014                                         &dbref_key, &path, 0, 0);
9015                 if (!ret)
9016                         found_dbackref = 1;
9017         }
9018
9019         if (!found_dbackref)
9020                 err |= BACKREF_MISSING;
9021 error:
9022         btrfs_release_path(&path);
9023         if (err & BACKREF_MISSING) {
9024                 error("data extent[%llu %llu] backref lost",
9025                       disk_bytenr, disk_num_bytes);
9026         }
9027         return err;
9028 }
9029
9030 /*
9031  * Get real tree block level for the case like shared block
9032  * Return >= 0 as tree level
9033  * Return <0 for error
9034  */
9035 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9036 {
9037         struct extent_buffer *eb;
9038         struct btrfs_path path;
9039         struct btrfs_key key;
9040         struct btrfs_extent_item *ei;
9041         u64 flags;
9042         u64 transid;
9043         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9044         u8 backref_level;
9045         u8 header_level;
9046         int ret;
9047
9048         /* Search extent tree for extent generation and level */
9049         key.objectid = bytenr;
9050         key.type = BTRFS_METADATA_ITEM_KEY;
9051         key.offset = (u64)-1;
9052
9053         btrfs_init_path(&path);
9054         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9055         if (ret < 0)
9056                 goto release_out;
9057         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9058         if (ret < 0)
9059                 goto release_out;
9060         if (ret > 0) {
9061                 ret = -ENOENT;
9062                 goto release_out;
9063         }
9064
9065         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9066         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9067                             struct btrfs_extent_item);
9068         flags = btrfs_extent_flags(path.nodes[0], ei);
9069         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9070                 ret = -ENOENT;
9071                 goto release_out;
9072         }
9073
9074         /* Get transid for later read_tree_block() check */
9075         transid = btrfs_extent_generation(path.nodes[0], ei);
9076
9077         /* Get backref level as one source */
9078         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9079                 backref_level = key.offset;
9080         } else {
9081                 struct btrfs_tree_block_info *info;
9082
9083                 info = (struct btrfs_tree_block_info *)(ei + 1);
9084                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9085         }
9086         btrfs_release_path(&path);
9087
9088         /* Get level from tree block as an alternative source */
9089         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9090         if (!extent_buffer_uptodate(eb)) {
9091                 free_extent_buffer(eb);
9092                 return -EIO;
9093         }
9094         header_level = btrfs_header_level(eb);
9095         free_extent_buffer(eb);
9096
9097         if (header_level != backref_level)
9098                 return -EIO;
9099         return header_level;
9100
9101 release_out:
9102         btrfs_release_path(&path);
9103         return ret;
9104 }
9105
9106 /*
9107  * Check if a tree block backref is valid (points to a valid tree block)
9108  * if level == -1, level will be resolved
9109  * Return >0 for any error found and print error message
9110  */
9111 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9112                                     u64 bytenr, int level)
9113 {
9114         struct btrfs_root *root;
9115         struct btrfs_key key;
9116         struct btrfs_path path;
9117         struct extent_buffer *eb;
9118         struct extent_buffer *node;
9119         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9120         int err = 0;
9121         int ret;
9122
9123         /* Query level for level == -1 special case */
9124         if (level == -1)
9125                 level = query_tree_block_level(fs_info, bytenr);
9126         if (level < 0) {
9127                 err |= REFERENCER_MISSING;
9128                 goto out;
9129         }
9130
9131         key.objectid = root_id;
9132         key.type = BTRFS_ROOT_ITEM_KEY;
9133         key.offset = (u64)-1;
9134
9135         root = btrfs_read_fs_root(fs_info, &key);
9136         if (IS_ERR(root)) {
9137                 err |= REFERENCER_MISSING;
9138                 goto out;
9139         }
9140
9141         /* Read out the tree block to get item/node key */
9142         eb = read_tree_block(root, bytenr, root->nodesize, 0);
9143         if (!extent_buffer_uptodate(eb)) {
9144                 err |= REFERENCER_MISSING;
9145                 free_extent_buffer(eb);
9146                 goto out;
9147         }
9148
9149         /* Empty tree, no need to check key */
9150         if (!btrfs_header_nritems(eb) && !level) {
9151                 free_extent_buffer(eb);
9152                 goto out;
9153         }
9154
9155         if (level)
9156                 btrfs_node_key_to_cpu(eb, &key, 0);
9157         else
9158                 btrfs_item_key_to_cpu(eb, &key, 0);
9159
9160         free_extent_buffer(eb);
9161
9162         btrfs_init_path(&path);
9163         /* Search with the first key, to ensure we can reach it */
9164         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9165         if (ret) {
9166                 err |= REFERENCER_MISSING;
9167                 goto release_out;
9168         }
9169
9170         node = path.nodes[level];
9171         if (btrfs_header_bytenr(node) != bytenr) {
9172                 error(
9173         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9174                         bytenr, nodesize, bytenr,
9175                         btrfs_header_bytenr(node));
9176                 err |= REFERENCER_MISMATCH;
9177         }
9178         if (btrfs_header_level(node) != level) {
9179                 error(
9180         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9181                         bytenr, nodesize, level,
9182                         btrfs_header_level(node));
9183                 err |= REFERENCER_MISMATCH;
9184         }
9185
9186 release_out:
9187         btrfs_release_path(&path);
9188 out:
9189         if (err & REFERENCER_MISSING) {
9190                 if (level < 0)
9191                         error("extent [%llu %d] lost referencer (owner: %llu)",
9192                                 bytenr, nodesize, root_id);
9193                 else
9194                         error(
9195                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9196                                 bytenr, nodesize, root_id, level);
9197         }
9198
9199         return err;
9200 }
9201
9202 /*
9203  * Check referencer for shared block backref
9204  * If level == -1, this function will resolve the level.
9205  */
9206 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9207                                      u64 parent, u64 bytenr, int level)
9208 {
9209         struct extent_buffer *eb;
9210         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9211         u32 nr;
9212         int found_parent = 0;
9213         int i;
9214
9215         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9216         if (!extent_buffer_uptodate(eb))
9217                 goto out;
9218
9219         if (level == -1)
9220                 level = query_tree_block_level(fs_info, bytenr);
9221         if (level < 0)
9222                 goto out;
9223
9224         if (level + 1 != btrfs_header_level(eb))
9225                 goto out;
9226
9227         nr = btrfs_header_nritems(eb);
9228         for (i = 0; i < nr; i++) {
9229                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9230                         found_parent = 1;
9231                         break;
9232                 }
9233         }
9234 out:
9235         free_extent_buffer(eb);
9236         if (!found_parent) {
9237                 error(
9238         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9239                         bytenr, nodesize, parent, level);
9240                 return REFERENCER_MISSING;
9241         }
9242         return 0;
9243 }
9244
9245 /*
9246  * Check referencer for normal (inlined) data ref
9247  * If len == 0, it will be resolved by searching in extent tree
9248  */
9249 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9250                                      u64 root_id, u64 objectid, u64 offset,
9251                                      u64 bytenr, u64 len, u32 count)
9252 {
9253         struct btrfs_root *root;
9254         struct btrfs_root *extent_root = fs_info->extent_root;
9255         struct btrfs_key key;
9256         struct btrfs_path path;
9257         struct extent_buffer *leaf;
9258         struct btrfs_file_extent_item *fi;
9259         u32 found_count = 0;
9260         int slot;
9261         int ret = 0;
9262
9263         if (!len) {
9264                 key.objectid = bytenr;
9265                 key.type = BTRFS_EXTENT_ITEM_KEY;
9266                 key.offset = (u64)-1;
9267
9268                 btrfs_init_path(&path);
9269                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9270                 if (ret < 0)
9271                         goto out;
9272                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9273                 if (ret)
9274                         goto out;
9275                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9276                 if (key.objectid != bytenr ||
9277                     key.type != BTRFS_EXTENT_ITEM_KEY)
9278                         goto out;
9279                 len = key.offset;
9280                 btrfs_release_path(&path);
9281         }
9282         key.objectid = root_id;
9283         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9284         key.offset = (u64)-1;
9285         btrfs_init_path(&path);
9286
9287         root = btrfs_read_fs_root(fs_info, &key);
9288         if (IS_ERR(root))
9289                 goto out;
9290
9291         key.objectid = objectid;
9292         key.type = BTRFS_EXTENT_DATA_KEY;
9293         /*
9294          * It can be nasty as data backref offset is
9295          * file offset - file extent offset, which is smaller or
9296          * equal to original backref offset.  The only special case is
9297          * overflow.  So we need to special check and do further search.
9298          */
9299         key.offset = offset & (1ULL << 63) ? 0 : offset;
9300
9301         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9302         if (ret < 0)
9303                 goto out;
9304
9305         /*
9306          * Search afterwards to get correct one
9307          * NOTE: As we must do a comprehensive check on the data backref to
9308          * make sure the dref count also matches, we must iterate all file
9309          * extents for that inode.
9310          */
9311         while (1) {
9312                 leaf = path.nodes[0];
9313                 slot = path.slots[0];
9314
9315                 btrfs_item_key_to_cpu(leaf, &key, slot);
9316                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9317                         break;
9318                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9319                 /*
9320                  * Except normal disk bytenr and disk num bytes, we still
9321                  * need to do extra check on dbackref offset as
9322                  * dbackref offset = file_offset - file_extent_offset
9323                  */
9324                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9325                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9326                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9327                     offset)
9328                         found_count++;
9329
9330                 ret = btrfs_next_item(root, &path);
9331                 if (ret)
9332                         break;
9333         }
9334 out:
9335         btrfs_release_path(&path);
9336         if (found_count != count) {
9337                 error(
9338 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9339                         bytenr, len, root_id, objectid, offset, count, found_count);
9340                 return REFERENCER_MISSING;
9341         }
9342         return 0;
9343 }
9344
9345 /*
9346  * Check if the referencer of a shared data backref exists
9347  */
9348 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9349                                      u64 parent, u64 bytenr)
9350 {
9351         struct extent_buffer *eb;
9352         struct btrfs_key key;
9353         struct btrfs_file_extent_item *fi;
9354         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9355         u32 nr;
9356         int found_parent = 0;
9357         int i;
9358
9359         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9360         if (!extent_buffer_uptodate(eb))
9361                 goto out;
9362
9363         nr = btrfs_header_nritems(eb);
9364         for (i = 0; i < nr; i++) {
9365                 btrfs_item_key_to_cpu(eb, &key, i);
9366                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9367                         continue;
9368
9369                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9370                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9371                         continue;
9372
9373                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9374                         found_parent = 1;
9375                         break;
9376                 }
9377         }
9378
9379 out:
9380         free_extent_buffer(eb);
9381         if (!found_parent) {
9382                 error("shared extent %llu referencer lost (parent: %llu)",
9383                         bytenr, parent);
9384                 return REFERENCER_MISSING;
9385         }
9386         return 0;
9387 }
9388
9389 /*
9390  * This function will check a given extent item, including its backref and
9391  * itself (like crossing stripe boundary and type)
9392  *
9393  * Since we don't use extent_record anymore, introduce new error bit
9394  */
9395 static int check_extent_item(struct btrfs_fs_info *fs_info,
9396                              struct extent_buffer *eb, int slot)
9397 {
9398         struct btrfs_extent_item *ei;
9399         struct btrfs_extent_inline_ref *iref;
9400         struct btrfs_extent_data_ref *dref;
9401         unsigned long end;
9402         unsigned long ptr;
9403         int type;
9404         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9405         u32 item_size = btrfs_item_size_nr(eb, slot);
9406         u64 flags;
9407         u64 offset;
9408         int metadata = 0;
9409         int level;
9410         struct btrfs_key key;
9411         int ret;
9412         int err = 0;
9413
9414         btrfs_item_key_to_cpu(eb, &key, slot);
9415         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9416                 bytes_used += key.offset;
9417         else
9418                 bytes_used += nodesize;
9419
9420         if (item_size < sizeof(*ei)) {
9421                 /*
9422                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9423                  * old thing when on disk format is still un-determined.
9424                  * No need to care about it anymore
9425                  */
9426                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9427                 return -ENOTTY;
9428         }
9429
9430         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9431         flags = btrfs_extent_flags(eb, ei);
9432
9433         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9434                 metadata = 1;
9435         if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9436                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9437                       key.objectid, key.objectid + nodesize);
9438                 err |= CROSSING_STRIPE_BOUNDARY;
9439         }
9440
9441         ptr = (unsigned long)(ei + 1);
9442
9443         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9444                 /* Old EXTENT_ITEM metadata */
9445                 struct btrfs_tree_block_info *info;
9446
9447                 info = (struct btrfs_tree_block_info *)ptr;
9448                 level = btrfs_tree_block_level(eb, info);
9449                 ptr += sizeof(struct btrfs_tree_block_info);
9450         } else {
9451                 /* New METADATA_ITEM */
9452                 level = key.offset;
9453         }
9454         end = (unsigned long)ei + item_size;
9455
9456         if (ptr >= end) {
9457                 err |= ITEM_SIZE_MISMATCH;
9458                 goto out;
9459         }
9460
9461         /* Now check every backref in this extent item */
9462 next:
9463         iref = (struct btrfs_extent_inline_ref *)ptr;
9464         type = btrfs_extent_inline_ref_type(eb, iref);
9465         offset = btrfs_extent_inline_ref_offset(eb, iref);
9466         switch (type) {
9467         case BTRFS_TREE_BLOCK_REF_KEY:
9468                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9469                                                level);
9470                 err |= ret;
9471                 break;
9472         case BTRFS_SHARED_BLOCK_REF_KEY:
9473                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9474                                                  level);
9475                 err |= ret;
9476                 break;
9477         case BTRFS_EXTENT_DATA_REF_KEY:
9478                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9479                 ret = check_extent_data_backref(fs_info,
9480                                 btrfs_extent_data_ref_root(eb, dref),
9481                                 btrfs_extent_data_ref_objectid(eb, dref),
9482                                 btrfs_extent_data_ref_offset(eb, dref),
9483                                 key.objectid, key.offset,
9484                                 btrfs_extent_data_ref_count(eb, dref));
9485                 err |= ret;
9486                 break;
9487         case BTRFS_SHARED_DATA_REF_KEY:
9488                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9489                 err |= ret;
9490                 break;
9491         default:
9492                 error("extent[%llu %d %llu] has unknown ref type: %d",
9493                         key.objectid, key.type, key.offset, type);
9494                 err |= UNKNOWN_TYPE;
9495                 goto out;
9496         }
9497
9498         ptr += btrfs_extent_inline_ref_size(type);
9499         if (ptr < end)
9500                 goto next;
9501
9502 out:
9503         return err;
9504 }
9505
9506 /*
9507  * Check if a dev extent item is referred correctly by its chunk
9508  */
9509 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9510                                  struct extent_buffer *eb, int slot)
9511 {
9512         struct btrfs_root *chunk_root = fs_info->chunk_root;
9513         struct btrfs_dev_extent *ptr;
9514         struct btrfs_path path;
9515         struct btrfs_key chunk_key;
9516         struct btrfs_key devext_key;
9517         struct btrfs_chunk *chunk;
9518         struct extent_buffer *l;
9519         int num_stripes;
9520         u64 length;
9521         int i;
9522         int found_chunk = 0;
9523         int ret;
9524
9525         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9526         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9527         length = btrfs_dev_extent_length(eb, ptr);
9528
9529         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9530         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9531         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9532
9533         btrfs_init_path(&path);
9534         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9535         if (ret)
9536                 goto out;
9537
9538         l = path.nodes[0];
9539         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9540         if (btrfs_chunk_length(l, chunk) != length)
9541                 goto out;
9542
9543         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9544         for (i = 0; i < num_stripes; i++) {
9545                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9546                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9547
9548                 if (devid == devext_key.objectid &&
9549                     offset == devext_key.offset) {
9550                         found_chunk = 1;
9551                         break;
9552                 }
9553         }
9554 out:
9555         btrfs_release_path(&path);
9556         if (!found_chunk) {
9557                 error(
9558                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9559                         devext_key.objectid, devext_key.offset, length);
9560                 return REFERENCER_MISSING;
9561         }
9562         return 0;
9563 }
9564
9565 /*
9566  * Check if the used space is correct with the dev item
9567  */
9568 static int check_dev_item(struct btrfs_fs_info *fs_info,
9569                           struct extent_buffer *eb, int slot)
9570 {
9571         struct btrfs_root *dev_root = fs_info->dev_root;
9572         struct btrfs_dev_item *dev_item;
9573         struct btrfs_path path;
9574         struct btrfs_key key;
9575         struct btrfs_dev_extent *ptr;
9576         u64 dev_id;
9577         u64 used;
9578         u64 total = 0;
9579         int ret;
9580
9581         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9582         dev_id = btrfs_device_id(eb, dev_item);
9583         used = btrfs_device_bytes_used(eb, dev_item);
9584
9585         key.objectid = dev_id;
9586         key.type = BTRFS_DEV_EXTENT_KEY;
9587         key.offset = 0;
9588
9589         btrfs_init_path(&path);
9590         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9591         if (ret < 0) {
9592                 btrfs_item_key_to_cpu(eb, &key, slot);
9593                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9594                         key.objectid, key.type, key.offset);
9595                 btrfs_release_path(&path);
9596                 return REFERENCER_MISSING;
9597         }
9598
9599         /* Iterate dev_extents to calculate the used space of a device */
9600         while (1) {
9601                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9602
9603                 if (key.objectid > dev_id)
9604                         break;
9605                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9606                         goto next;
9607
9608                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9609                                      struct btrfs_dev_extent);
9610                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9611 next:
9612                 ret = btrfs_next_item(dev_root, &path);
9613                 if (ret)
9614                         break;
9615         }
9616         btrfs_release_path(&path);
9617
9618         if (used != total) {
9619                 btrfs_item_key_to_cpu(eb, &key, slot);
9620                 error(
9621 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9622                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9623                         BTRFS_DEV_EXTENT_KEY, dev_id);
9624                 return ACCOUNTING_MISMATCH;
9625         }
9626         return 0;
9627 }
9628
9629 /*
9630  * Check a block group item with its referener (chunk) and its used space
9631  * with extent/metadata item
9632  */
9633 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9634                                   struct extent_buffer *eb, int slot)
9635 {
9636         struct btrfs_root *extent_root = fs_info->extent_root;
9637         struct btrfs_root *chunk_root = fs_info->chunk_root;
9638         struct btrfs_block_group_item *bi;
9639         struct btrfs_block_group_item bg_item;
9640         struct btrfs_path path;
9641         struct btrfs_key bg_key;
9642         struct btrfs_key chunk_key;
9643         struct btrfs_key extent_key;
9644         struct btrfs_chunk *chunk;
9645         struct extent_buffer *leaf;
9646         struct btrfs_extent_item *ei;
9647         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9648         u64 flags;
9649         u64 bg_flags;
9650         u64 used;
9651         u64 total = 0;
9652         int ret;
9653         int err = 0;
9654
9655         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9656         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9657         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9658         used = btrfs_block_group_used(&bg_item);
9659         bg_flags = btrfs_block_group_flags(&bg_item);
9660
9661         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9662         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9663         chunk_key.offset = bg_key.objectid;
9664
9665         btrfs_init_path(&path);
9666         /* Search for the referencer chunk */
9667         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9668         if (ret) {
9669                 error(
9670                 "block group[%llu %llu] did not find the related chunk item",
9671                         bg_key.objectid, bg_key.offset);
9672                 err |= REFERENCER_MISSING;
9673         } else {
9674                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9675                                         struct btrfs_chunk);
9676                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9677                                                 bg_key.offset) {
9678                         error(
9679         "block group[%llu %llu] related chunk item length does not match",
9680                                 bg_key.objectid, bg_key.offset);
9681                         err |= REFERENCER_MISMATCH;
9682                 }
9683         }
9684         btrfs_release_path(&path);
9685
9686         /* Search from the block group bytenr */
9687         extent_key.objectid = bg_key.objectid;
9688         extent_key.type = 0;
9689         extent_key.offset = 0;
9690
9691         btrfs_init_path(&path);
9692         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9693         if (ret < 0)
9694                 goto out;
9695
9696         /* Iterate extent tree to account used space */
9697         while (1) {
9698                 leaf = path.nodes[0];
9699                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9700                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9701                         break;
9702
9703                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9704                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9705                         goto next;
9706                 if (extent_key.objectid < bg_key.objectid)
9707                         goto next;
9708
9709                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9710                         total += nodesize;
9711                 else
9712                         total += extent_key.offset;
9713
9714                 ei = btrfs_item_ptr(leaf, path.slots[0],
9715                                     struct btrfs_extent_item);
9716                 flags = btrfs_extent_flags(leaf, ei);
9717                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9718                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9719                                 error(
9720                         "bad extent[%llu, %llu) type mismatch with chunk",
9721                                         extent_key.objectid,
9722                                         extent_key.objectid + extent_key.offset);
9723                                 err |= CHUNK_TYPE_MISMATCH;
9724                         }
9725                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9726                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9727                                     BTRFS_BLOCK_GROUP_METADATA))) {
9728                                 error(
9729                         "bad extent[%llu, %llu) type mismatch with chunk",
9730                                         extent_key.objectid,
9731                                         extent_key.objectid + nodesize);
9732                                 err |= CHUNK_TYPE_MISMATCH;
9733                         }
9734                 }
9735 next:
9736                 ret = btrfs_next_item(extent_root, &path);
9737                 if (ret)
9738                         break;
9739         }
9740
9741 out:
9742         btrfs_release_path(&path);
9743
9744         if (total != used) {
9745                 error(
9746                 "block group[%llu %llu] used %llu but extent items used %llu",
9747                         bg_key.objectid, bg_key.offset, used, total);
9748                 err |= ACCOUNTING_MISMATCH;
9749         }
9750         return err;
9751 }
9752
9753 /*
9754  * Check a chunk item.
9755  * Including checking all referred dev_extents and block group
9756  */
9757 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9758                             struct extent_buffer *eb, int slot)
9759 {
9760         struct btrfs_root *extent_root = fs_info->extent_root;
9761         struct btrfs_root *dev_root = fs_info->dev_root;
9762         struct btrfs_path path;
9763         struct btrfs_key chunk_key;
9764         struct btrfs_key bg_key;
9765         struct btrfs_key devext_key;
9766         struct btrfs_chunk *chunk;
9767         struct extent_buffer *leaf;
9768         struct btrfs_block_group_item *bi;
9769         struct btrfs_block_group_item bg_item;
9770         struct btrfs_dev_extent *ptr;
9771         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9772         u64 length;
9773         u64 chunk_end;
9774         u64 type;
9775         u64 profile;
9776         int num_stripes;
9777         u64 offset;
9778         u64 objectid;
9779         int i;
9780         int ret;
9781         int err = 0;
9782
9783         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9784         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9785         length = btrfs_chunk_length(eb, chunk);
9786         chunk_end = chunk_key.offset + length;
9787         if (!IS_ALIGNED(length, sectorsize)) {
9788                 error("chunk[%llu %llu) not aligned to %u",
9789                         chunk_key.offset, chunk_end, sectorsize);
9790                 err |= BYTES_UNALIGNED;
9791                 goto out;
9792         }
9793
9794         type = btrfs_chunk_type(eb, chunk);
9795         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9796         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9797                 error("chunk[%llu %llu) has no chunk type",
9798                         chunk_key.offset, chunk_end);
9799                 err |= UNKNOWN_TYPE;
9800         }
9801         if (profile && (profile & (profile - 1))) {
9802                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9803                         chunk_key.offset, chunk_end, profile);
9804                 err |= UNKNOWN_TYPE;
9805         }
9806
9807         bg_key.objectid = chunk_key.offset;
9808         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9809         bg_key.offset = length;
9810
9811         btrfs_init_path(&path);
9812         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9813         if (ret) {
9814                 error(
9815                 "chunk[%llu %llu) did not find the related block group item",
9816                         chunk_key.offset, chunk_end);
9817                 err |= REFERENCER_MISSING;
9818         } else{
9819                 leaf = path.nodes[0];
9820                 bi = btrfs_item_ptr(leaf, path.slots[0],
9821                                     struct btrfs_block_group_item);
9822                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9823                                    sizeof(bg_item));
9824                 if (btrfs_block_group_flags(&bg_item) != type) {
9825                         error(
9826 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9827                                 chunk_key.offset, chunk_end, type,
9828                                 btrfs_block_group_flags(&bg_item));
9829                         err |= REFERENCER_MISSING;
9830                 }
9831         }
9832
9833         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9834         for (i = 0; i < num_stripes; i++) {
9835                 btrfs_release_path(&path);
9836                 btrfs_init_path(&path);
9837                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9838                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9839                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9840
9841                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9842                                         0, 0);
9843                 if (ret)
9844                         goto not_match_dev;
9845
9846                 leaf = path.nodes[0];
9847                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9848                                      struct btrfs_dev_extent);
9849                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9850                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9851                 if (objectid != chunk_key.objectid ||
9852                     offset != chunk_key.offset ||
9853                     btrfs_dev_extent_length(leaf, ptr) != length)
9854                         goto not_match_dev;
9855                 continue;
9856 not_match_dev:
9857                 err |= BACKREF_MISSING;
9858                 error(
9859                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9860                         chunk_key.objectid, chunk_end, i);
9861                 continue;
9862         }
9863         btrfs_release_path(&path);
9864 out:
9865         return err;
9866 }
9867
9868 /*
9869  * Main entry function to check known items and update related accounting info
9870  */
9871 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9872 {
9873         struct btrfs_fs_info *fs_info = root->fs_info;
9874         struct btrfs_key key;
9875         int slot = 0;
9876         int type;
9877         struct btrfs_extent_data_ref *dref;
9878         int ret;
9879         int err = 0;
9880
9881 next:
9882         btrfs_item_key_to_cpu(eb, &key, slot);
9883         type = btrfs_key_type(&key);
9884
9885         switch (type) {
9886         case BTRFS_EXTENT_DATA_KEY:
9887                 ret = check_extent_data_item(root, eb, slot);
9888                 err |= ret;
9889                 break;
9890         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9891                 ret = check_block_group_item(fs_info, eb, slot);
9892                 err |= ret;
9893                 break;
9894         case BTRFS_DEV_ITEM_KEY:
9895                 ret = check_dev_item(fs_info, eb, slot);
9896                 err |= ret;
9897                 break;
9898         case BTRFS_CHUNK_ITEM_KEY:
9899                 ret = check_chunk_item(fs_info, eb, slot);
9900                 err |= ret;
9901                 break;
9902         case BTRFS_DEV_EXTENT_KEY:
9903                 ret = check_dev_extent_item(fs_info, eb, slot);
9904                 err |= ret;
9905                 break;
9906         case BTRFS_EXTENT_ITEM_KEY:
9907         case BTRFS_METADATA_ITEM_KEY:
9908                 ret = check_extent_item(fs_info, eb, slot);
9909                 err |= ret;
9910                 break;
9911         case BTRFS_EXTENT_CSUM_KEY:
9912                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9913                 break;
9914         case BTRFS_TREE_BLOCK_REF_KEY:
9915                 ret = check_tree_block_backref(fs_info, key.offset,
9916                                                key.objectid, -1);
9917                 err |= ret;
9918                 break;
9919         case BTRFS_EXTENT_DATA_REF_KEY:
9920                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9921                 ret = check_extent_data_backref(fs_info,
9922                                 btrfs_extent_data_ref_root(eb, dref),
9923                                 btrfs_extent_data_ref_objectid(eb, dref),
9924                                 btrfs_extent_data_ref_offset(eb, dref),
9925                                 key.objectid, 0,
9926                                 btrfs_extent_data_ref_count(eb, dref));
9927                 err |= ret;
9928                 break;
9929         case BTRFS_SHARED_BLOCK_REF_KEY:
9930                 ret = check_shared_block_backref(fs_info, key.offset,
9931                                                  key.objectid, -1);
9932                 err |= ret;
9933                 break;
9934         case BTRFS_SHARED_DATA_REF_KEY:
9935                 ret = check_shared_data_backref(fs_info, key.offset,
9936                                                 key.objectid);
9937                 err |= ret;
9938                 break;
9939         default:
9940                 break;
9941         }
9942
9943         if (++slot < btrfs_header_nritems(eb))
9944                 goto next;
9945
9946         return err;
9947 }
9948
9949 /*
9950  * Helper function for later fs/subvol tree check.  To determine if a tree
9951  * block should be checked.
9952  * This function will ensure only the direct referencer with lowest rootid to
9953  * check a fs/subvolume tree block.
9954  *
9955  * Backref check at extent tree would detect errors like missing subvolume
9956  * tree, so we can do aggressive check to reduce duplicated checks.
9957  */
9958 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9959 {
9960         struct btrfs_root *extent_root = root->fs_info->extent_root;
9961         struct btrfs_key key;
9962         struct btrfs_path path;
9963         struct extent_buffer *leaf;
9964         int slot;
9965         struct btrfs_extent_item *ei;
9966         unsigned long ptr;
9967         unsigned long end;
9968         int type;
9969         u32 item_size;
9970         u64 offset;
9971         struct btrfs_extent_inline_ref *iref;
9972         int ret;
9973
9974         btrfs_init_path(&path);
9975         key.objectid = btrfs_header_bytenr(eb);
9976         key.type = BTRFS_METADATA_ITEM_KEY;
9977         key.offset = (u64)-1;
9978
9979         /*
9980          * Any failure in backref resolving means we can't determine
9981          * whom the tree block belongs to.
9982          * So in that case, we need to check that tree block
9983          */
9984         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9985         if (ret < 0)
9986                 goto need_check;
9987
9988         ret = btrfs_previous_extent_item(extent_root, &path,
9989                                          btrfs_header_bytenr(eb));
9990         if (ret)
9991                 goto need_check;
9992
9993         leaf = path.nodes[0];
9994         slot = path.slots[0];
9995         btrfs_item_key_to_cpu(leaf, &key, slot);
9996         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9997
9998         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9999                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10000         } else {
10001                 struct btrfs_tree_block_info *info;
10002
10003                 info = (struct btrfs_tree_block_info *)(ei + 1);
10004                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10005         }
10006
10007         item_size = btrfs_item_size_nr(leaf, slot);
10008         ptr = (unsigned long)iref;
10009         end = (unsigned long)ei + item_size;
10010         while (ptr < end) {
10011                 iref = (struct btrfs_extent_inline_ref *)ptr;
10012                 type = btrfs_extent_inline_ref_type(leaf, iref);
10013                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10014
10015                 /*
10016                  * We only check the tree block if current root is
10017                  * the lowest referencer of it.
10018                  */
10019                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10020                     offset < root->objectid) {
10021                         btrfs_release_path(&path);
10022                         return 0;
10023                 }
10024
10025                 ptr += btrfs_extent_inline_ref_size(type);
10026         }
10027         /*
10028          * Normally we should also check keyed tree block ref, but that may be
10029          * very time consuming.  Inlined ref should already make us skip a lot
10030          * of refs now.  So skip search keyed tree block ref.
10031          */
10032
10033 need_check:
10034         btrfs_release_path(&path);
10035         return 1;
10036 }
10037
10038 /*
10039  * Traversal function for tree block. We will do:
10040  * 1) Skip shared fs/subvolume tree blocks
10041  * 2) Update related bytes accounting
10042  * 3) Pre-order traversal
10043  */
10044 static int traverse_tree_block(struct btrfs_root *root,
10045                                 struct extent_buffer *node)
10046 {
10047         struct extent_buffer *eb;
10048         int level;
10049         u64 nr;
10050         int i;
10051         int err = 0;
10052         int ret;
10053
10054         /*
10055          * Skip shared fs/subvolume tree block, in that case they will
10056          * be checked by referencer with lowest rootid
10057          */
10058         if (is_fstree(root->objectid) && !should_check(root, node))
10059                 return 0;
10060
10061         /* Update bytes accounting */
10062         total_btree_bytes += node->len;
10063         if (fs_root_objectid(btrfs_header_owner(node)))
10064                 total_fs_tree_bytes += node->len;
10065         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10066                 total_extent_tree_bytes += node->len;
10067         if (!found_old_backref &&
10068             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10069             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10070             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10071                 found_old_backref = 1;
10072
10073         /* pre-order tranversal, check itself first */
10074         level = btrfs_header_level(node);
10075         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10076                                    btrfs_header_level(node),
10077                                    btrfs_header_owner(node));
10078         err |= ret;
10079         if (err)
10080                 error(
10081         "check %s failed root %llu bytenr %llu level %d, force continue check",
10082                         level ? "node":"leaf", root->objectid,
10083                         btrfs_header_bytenr(node), btrfs_header_level(node));
10084
10085         if (!level) {
10086                 btree_space_waste += btrfs_leaf_free_space(root, node);
10087                 ret = check_leaf_items(root, node);
10088                 err |= ret;
10089                 return err;
10090         }
10091
10092         nr = btrfs_header_nritems(node);
10093         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10094                 sizeof(struct btrfs_key_ptr);
10095
10096         /* Then check all its children */
10097         for (i = 0; i < nr; i++) {
10098                 u64 blocknr = btrfs_node_blockptr(node, i);
10099
10100                 /*
10101                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10102                  * to call the function itself.
10103                  */
10104                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10105                 if (extent_buffer_uptodate(eb)) {
10106                         ret = traverse_tree_block(root, eb);
10107                         err |= ret;
10108                 }
10109                 free_extent_buffer(eb);
10110         }
10111
10112         return err;
10113 }
10114
10115 /*
10116  * Low memory usage version check_chunks_and_extents.
10117  */
10118 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10119 {
10120         struct btrfs_path path;
10121         struct btrfs_key key;
10122         struct btrfs_root *root1;
10123         struct btrfs_root *cur_root;
10124         int err = 0;
10125         int ret;
10126
10127         root1 = root->fs_info->chunk_root;
10128         ret = traverse_tree_block(root1, root1->node);
10129         err |= ret;
10130
10131         root1 = root->fs_info->tree_root;
10132         ret = traverse_tree_block(root1, root1->node);
10133         err |= ret;
10134
10135         btrfs_init_path(&path);
10136         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10137         key.offset = 0;
10138         key.type = BTRFS_ROOT_ITEM_KEY;
10139
10140         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10141         if (ret) {
10142                 error("cannot find extent treet in tree_root");
10143                 goto out;
10144         }
10145
10146         while (1) {
10147                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10148                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10149                         goto next;
10150                 key.offset = (u64)-1;
10151
10152                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10153                 if (IS_ERR(cur_root) || !cur_root) {
10154                         error("failed to read tree: %lld", key.objectid);
10155                         goto next;
10156                 }
10157
10158                 ret = traverse_tree_block(cur_root, cur_root->node);
10159                 err |= ret;
10160
10161 next:
10162                 ret = btrfs_next_item(root1, &path);
10163                 if (ret)
10164                         goto out;
10165         }
10166
10167 out:
10168         btrfs_release_path(&path);
10169         return err;
10170 }
10171
10172 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10173                            struct btrfs_root *root, int overwrite)
10174 {
10175         struct extent_buffer *c;
10176         struct extent_buffer *old = root->node;
10177         int level;
10178         int ret;
10179         struct btrfs_disk_key disk_key = {0,0,0};
10180
10181         level = 0;
10182
10183         if (overwrite) {
10184                 c = old;
10185                 extent_buffer_get(c);
10186                 goto init;
10187         }
10188         c = btrfs_alloc_free_block(trans, root,
10189                                    root->nodesize,
10190                                    root->root_key.objectid,
10191                                    &disk_key, level, 0, 0);
10192         if (IS_ERR(c)) {
10193                 c = old;
10194                 extent_buffer_get(c);
10195                 overwrite = 1;
10196         }
10197 init:
10198         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10199         btrfs_set_header_level(c, level);
10200         btrfs_set_header_bytenr(c, c->start);
10201         btrfs_set_header_generation(c, trans->transid);
10202         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10203         btrfs_set_header_owner(c, root->root_key.objectid);
10204
10205         write_extent_buffer(c, root->fs_info->fsid,
10206                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10207
10208         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10209                             btrfs_header_chunk_tree_uuid(c),
10210                             BTRFS_UUID_SIZE);
10211
10212         btrfs_mark_buffer_dirty(c);
10213         /*
10214          * this case can happen in the following case:
10215          *
10216          * 1.overwrite previous root.
10217          *
10218          * 2.reinit reloc data root, this is because we skip pin
10219          * down reloc data tree before which means we can allocate
10220          * same block bytenr here.
10221          */
10222         if (old->start == c->start) {
10223                 btrfs_set_root_generation(&root->root_item,
10224                                           trans->transid);
10225                 root->root_item.level = btrfs_header_level(root->node);
10226                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10227                                         &root->root_key, &root->root_item);
10228                 if (ret) {
10229                         free_extent_buffer(c);
10230                         return ret;
10231                 }
10232         }
10233         free_extent_buffer(old);
10234         root->node = c;
10235         add_root_to_dirty_list(root);
10236         return 0;
10237 }
10238
10239 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10240                                 struct extent_buffer *eb, int tree_root)
10241 {
10242         struct extent_buffer *tmp;
10243         struct btrfs_root_item *ri;
10244         struct btrfs_key key;
10245         u64 bytenr;
10246         u32 nodesize;
10247         int level = btrfs_header_level(eb);
10248         int nritems;
10249         int ret;
10250         int i;
10251
10252         /*
10253          * If we have pinned this block before, don't pin it again.
10254          * This can not only avoid forever loop with broken filesystem
10255          * but also give us some speedups.
10256          */
10257         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10258                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10259                 return 0;
10260
10261         btrfs_pin_extent(fs_info, eb->start, eb->len);
10262
10263         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10264         nritems = btrfs_header_nritems(eb);
10265         for (i = 0; i < nritems; i++) {
10266                 if (level == 0) {
10267                         btrfs_item_key_to_cpu(eb, &key, i);
10268                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10269                                 continue;
10270                         /* Skip the extent root and reloc roots */
10271                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10272                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10273                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10274                                 continue;
10275                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10276                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10277
10278                         /*
10279                          * If at any point we start needing the real root we
10280                          * will have to build a stump root for the root we are
10281                          * in, but for now this doesn't actually use the root so
10282                          * just pass in extent_root.
10283                          */
10284                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10285                                               nodesize, 0);
10286                         if (!extent_buffer_uptodate(tmp)) {
10287                                 fprintf(stderr, "Error reading root block\n");
10288                                 return -EIO;
10289                         }
10290                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10291                         free_extent_buffer(tmp);
10292                         if (ret)
10293                                 return ret;
10294                 } else {
10295                         bytenr = btrfs_node_blockptr(eb, i);
10296
10297                         /* If we aren't the tree root don't read the block */
10298                         if (level == 1 && !tree_root) {
10299                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10300                                 continue;
10301                         }
10302
10303                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10304                                               nodesize, 0);
10305                         if (!extent_buffer_uptodate(tmp)) {
10306                                 fprintf(stderr, "Error reading tree block\n");
10307                                 return -EIO;
10308                         }
10309                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10310                         free_extent_buffer(tmp);
10311                         if (ret)
10312                                 return ret;
10313                 }
10314         }
10315
10316         return 0;
10317 }
10318
10319 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10320 {
10321         int ret;
10322
10323         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10324         if (ret)
10325                 return ret;
10326
10327         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10328 }
10329
10330 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10331 {
10332         struct btrfs_block_group_cache *cache;
10333         struct btrfs_path *path;
10334         struct extent_buffer *leaf;
10335         struct btrfs_chunk *chunk;
10336         struct btrfs_key key;
10337         int ret;
10338         u64 start;
10339
10340         path = btrfs_alloc_path();
10341         if (!path)
10342                 return -ENOMEM;
10343
10344         key.objectid = 0;
10345         key.type = BTRFS_CHUNK_ITEM_KEY;
10346         key.offset = 0;
10347
10348         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10349         if (ret < 0) {
10350                 btrfs_free_path(path);
10351                 return ret;
10352         }
10353
10354         /*
10355          * We do this in case the block groups were screwed up and had alloc
10356          * bits that aren't actually set on the chunks.  This happens with
10357          * restored images every time and could happen in real life I guess.
10358          */
10359         fs_info->avail_data_alloc_bits = 0;
10360         fs_info->avail_metadata_alloc_bits = 0;
10361         fs_info->avail_system_alloc_bits = 0;
10362
10363         /* First we need to create the in-memory block groups */
10364         while (1) {
10365                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10366                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
10367                         if (ret < 0) {
10368                                 btrfs_free_path(path);
10369                                 return ret;
10370                         }
10371                         if (ret) {
10372                                 ret = 0;
10373                                 break;
10374                         }
10375                 }
10376                 leaf = path->nodes[0];
10377                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10378                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10379                         path->slots[0]++;
10380                         continue;
10381                 }
10382
10383                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10384                                        struct btrfs_chunk);
10385                 btrfs_add_block_group(fs_info, 0,
10386                                       btrfs_chunk_type(leaf, chunk),
10387                                       key.objectid, key.offset,
10388                                       btrfs_chunk_length(leaf, chunk));
10389                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10390                                  key.offset + btrfs_chunk_length(leaf, chunk),
10391                                  GFP_NOFS);
10392                 path->slots[0]++;
10393         }
10394         start = 0;
10395         while (1) {
10396                 cache = btrfs_lookup_first_block_group(fs_info, start);
10397                 if (!cache)
10398                         break;
10399                 cache->cached = 1;
10400                 start = cache->key.objectid + cache->key.offset;
10401         }
10402
10403         btrfs_free_path(path);
10404         return 0;
10405 }
10406
10407 static int reset_balance(struct btrfs_trans_handle *trans,
10408                          struct btrfs_fs_info *fs_info)
10409 {
10410         struct btrfs_root *root = fs_info->tree_root;
10411         struct btrfs_path *path;
10412         struct extent_buffer *leaf;
10413         struct btrfs_key key;
10414         int del_slot, del_nr = 0;
10415         int ret;
10416         int found = 0;
10417
10418         path = btrfs_alloc_path();
10419         if (!path)
10420                 return -ENOMEM;
10421
10422         key.objectid = BTRFS_BALANCE_OBJECTID;
10423         key.type = BTRFS_BALANCE_ITEM_KEY;
10424         key.offset = 0;
10425
10426         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10427         if (ret) {
10428                 if (ret > 0)
10429                         ret = 0;
10430                 if (!ret)
10431                         goto reinit_data_reloc;
10432                 else
10433                         goto out;
10434         }
10435
10436         ret = btrfs_del_item(trans, root, path);
10437         if (ret)
10438                 goto out;
10439         btrfs_release_path(path);
10440
10441         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10442         key.type = BTRFS_ROOT_ITEM_KEY;
10443         key.offset = 0;
10444
10445         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10446         if (ret < 0)
10447                 goto out;
10448         while (1) {
10449                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10450                         if (!found)
10451                                 break;
10452
10453                         if (del_nr) {
10454                                 ret = btrfs_del_items(trans, root, path,
10455                                                       del_slot, del_nr);
10456                                 del_nr = 0;
10457                                 if (ret)
10458                                         goto out;
10459                         }
10460                         key.offset++;
10461                         btrfs_release_path(path);
10462
10463                         found = 0;
10464                         ret = btrfs_search_slot(trans, root, &key, path,
10465                                                 -1, 1);
10466                         if (ret < 0)
10467                                 goto out;
10468                         continue;
10469                 }
10470                 found = 1;
10471                 leaf = path->nodes[0];
10472                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10473                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10474                         break;
10475                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10476                         path->slots[0]++;
10477                         continue;
10478                 }
10479                 if (!del_nr) {
10480                         del_slot = path->slots[0];
10481                         del_nr = 1;
10482                 } else {
10483                         del_nr++;
10484                 }
10485                 path->slots[0]++;
10486         }
10487
10488         if (del_nr) {
10489                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10490                 if (ret)
10491                         goto out;
10492         }
10493         btrfs_release_path(path);
10494
10495 reinit_data_reloc:
10496         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10497         key.type = BTRFS_ROOT_ITEM_KEY;
10498         key.offset = (u64)-1;
10499         root = btrfs_read_fs_root(fs_info, &key);
10500         if (IS_ERR(root)) {
10501                 fprintf(stderr, "Error reading data reloc tree\n");
10502                 ret = PTR_ERR(root);
10503                 goto out;
10504         }
10505         record_root_in_trans(trans, root);
10506         ret = btrfs_fsck_reinit_root(trans, root, 0);
10507         if (ret)
10508                 goto out;
10509         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10510 out:
10511         btrfs_free_path(path);
10512         return ret;
10513 }
10514
10515 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10516                               struct btrfs_fs_info *fs_info)
10517 {
10518         u64 start = 0;
10519         int ret;
10520
10521         /*
10522          * The only reason we don't do this is because right now we're just
10523          * walking the trees we find and pinning down their bytes, we don't look
10524          * at any of the leaves.  In order to do mixed groups we'd have to check
10525          * the leaves of any fs roots and pin down the bytes for any file
10526          * extents we find.  Not hard but why do it if we don't have to?
10527          */
10528         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10529                 fprintf(stderr, "We don't support re-initing the extent tree "
10530                         "for mixed block groups yet, please notify a btrfs "
10531                         "developer you want to do this so they can add this "
10532                         "functionality.\n");
10533                 return -EINVAL;
10534         }
10535
10536         /*
10537          * first we need to walk all of the trees except the extent tree and pin
10538          * down the bytes that are in use so we don't overwrite any existing
10539          * metadata.
10540          */
10541         ret = pin_metadata_blocks(fs_info);
10542         if (ret) {
10543                 fprintf(stderr, "error pinning down used bytes\n");
10544                 return ret;
10545         }
10546
10547         /*
10548          * Need to drop all the block groups since we're going to recreate all
10549          * of them again.
10550          */
10551         btrfs_free_block_groups(fs_info);
10552         ret = reset_block_groups(fs_info);
10553         if (ret) {
10554                 fprintf(stderr, "error resetting the block groups\n");
10555                 return ret;
10556         }
10557
10558         /* Ok we can allocate now, reinit the extent root */
10559         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10560         if (ret) {
10561                 fprintf(stderr, "extent root initialization failed\n");
10562                 /*
10563                  * When the transaction code is updated we should end the
10564                  * transaction, but for now progs only knows about commit so
10565                  * just return an error.
10566                  */
10567                 return ret;
10568         }
10569
10570         /*
10571          * Now we have all the in-memory block groups setup so we can make
10572          * allocations properly, and the metadata we care about is safe since we
10573          * pinned all of it above.
10574          */
10575         while (1) {
10576                 struct btrfs_block_group_cache *cache;
10577
10578                 cache = btrfs_lookup_first_block_group(fs_info, start);
10579                 if (!cache)
10580                         break;
10581                 start = cache->key.objectid + cache->key.offset;
10582                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10583                                         &cache->key, &cache->item,
10584                                         sizeof(cache->item));
10585                 if (ret) {
10586                         fprintf(stderr, "Error adding block group\n");
10587                         return ret;
10588                 }
10589                 btrfs_extent_post_op(trans, fs_info->extent_root);
10590         }
10591
10592         ret = reset_balance(trans, fs_info);
10593         if (ret)
10594                 fprintf(stderr, "error resetting the pending balance\n");
10595
10596         return ret;
10597 }
10598
10599 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10600 {
10601         struct btrfs_path *path;
10602         struct btrfs_trans_handle *trans;
10603         struct btrfs_key key;
10604         int ret;
10605
10606         printf("Recowing metadata block %llu\n", eb->start);
10607         key.objectid = btrfs_header_owner(eb);
10608         key.type = BTRFS_ROOT_ITEM_KEY;
10609         key.offset = (u64)-1;
10610
10611         root = btrfs_read_fs_root(root->fs_info, &key);
10612         if (IS_ERR(root)) {
10613                 fprintf(stderr, "Couldn't find owner root %llu\n",
10614                         key.objectid);
10615                 return PTR_ERR(root);
10616         }
10617
10618         path = btrfs_alloc_path();
10619         if (!path)
10620                 return -ENOMEM;
10621
10622         trans = btrfs_start_transaction(root, 1);
10623         if (IS_ERR(trans)) {
10624                 btrfs_free_path(path);
10625                 return PTR_ERR(trans);
10626         }
10627
10628         path->lowest_level = btrfs_header_level(eb);
10629         if (path->lowest_level)
10630                 btrfs_node_key_to_cpu(eb, &key, 0);
10631         else
10632                 btrfs_item_key_to_cpu(eb, &key, 0);
10633
10634         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10635         btrfs_commit_transaction(trans, root);
10636         btrfs_free_path(path);
10637         return ret;
10638 }
10639
10640 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10641 {
10642         struct btrfs_path *path;
10643         struct btrfs_trans_handle *trans;
10644         struct btrfs_key key;
10645         int ret;
10646
10647         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10648                bad->key.type, bad->key.offset);
10649         key.objectid = bad->root_id;
10650         key.type = BTRFS_ROOT_ITEM_KEY;
10651         key.offset = (u64)-1;
10652
10653         root = btrfs_read_fs_root(root->fs_info, &key);
10654         if (IS_ERR(root)) {
10655                 fprintf(stderr, "Couldn't find owner root %llu\n",
10656                         key.objectid);
10657                 return PTR_ERR(root);
10658         }
10659
10660         path = btrfs_alloc_path();
10661         if (!path)
10662                 return -ENOMEM;
10663
10664         trans = btrfs_start_transaction(root, 1);
10665         if (IS_ERR(trans)) {
10666                 btrfs_free_path(path);
10667                 return PTR_ERR(trans);
10668         }
10669
10670         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10671         if (ret) {
10672                 if (ret > 0)
10673                         ret = 0;
10674                 goto out;
10675         }
10676         ret = btrfs_del_item(trans, root, path);
10677 out:
10678         btrfs_commit_transaction(trans, root);
10679         btrfs_free_path(path);
10680         return ret;
10681 }
10682
10683 static int zero_log_tree(struct btrfs_root *root)
10684 {
10685         struct btrfs_trans_handle *trans;
10686         int ret;
10687
10688         trans = btrfs_start_transaction(root, 1);
10689         if (IS_ERR(trans)) {
10690                 ret = PTR_ERR(trans);
10691                 return ret;
10692         }
10693         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10694         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10695         ret = btrfs_commit_transaction(trans, root);
10696         return ret;
10697 }
10698
10699 static int populate_csum(struct btrfs_trans_handle *trans,
10700                          struct btrfs_root *csum_root, char *buf, u64 start,
10701                          u64 len)
10702 {
10703         u64 offset = 0;
10704         u64 sectorsize;
10705         int ret = 0;
10706
10707         while (offset < len) {
10708                 sectorsize = csum_root->sectorsize;
10709                 ret = read_extent_data(csum_root, buf, start + offset,
10710                                        &sectorsize, 0);
10711                 if (ret)
10712                         break;
10713                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10714                                             start + offset, buf, sectorsize);
10715                 if (ret)
10716                         break;
10717                 offset += sectorsize;
10718         }
10719         return ret;
10720 }
10721
10722 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10723                                       struct btrfs_root *csum_root,
10724                                       struct btrfs_root *cur_root)
10725 {
10726         struct btrfs_path *path;
10727         struct btrfs_key key;
10728         struct extent_buffer *node;
10729         struct btrfs_file_extent_item *fi;
10730         char *buf = NULL;
10731         u64 start = 0;
10732         u64 len = 0;
10733         int slot = 0;
10734         int ret = 0;
10735
10736         path = btrfs_alloc_path();
10737         if (!path)
10738                 return -ENOMEM;
10739         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10740         if (!buf) {
10741                 ret = -ENOMEM;
10742                 goto out;
10743         }
10744
10745         key.objectid = 0;
10746         key.offset = 0;
10747         key.type = 0;
10748
10749         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10750         if (ret < 0)
10751                 goto out;
10752         /* Iterate all regular file extents and fill its csum */
10753         while (1) {
10754                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10755
10756                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10757                         goto next;
10758                 node = path->nodes[0];
10759                 slot = path->slots[0];
10760                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10761                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10762                         goto next;
10763                 start = btrfs_file_extent_disk_bytenr(node, fi);
10764                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10765
10766                 ret = populate_csum(trans, csum_root, buf, start, len);
10767                 if (ret == -EEXIST)
10768                         ret = 0;
10769                 if (ret < 0)
10770                         goto out;
10771 next:
10772                 /*
10773                  * TODO: if next leaf is corrupted, jump to nearest next valid
10774                  * leaf.
10775                  */
10776                 ret = btrfs_next_item(cur_root, path);
10777                 if (ret < 0)
10778                         goto out;
10779                 if (ret > 0) {
10780                         ret = 0;
10781                         goto out;
10782                 }
10783         }
10784
10785 out:
10786         btrfs_free_path(path);
10787         free(buf);
10788         return ret;
10789 }
10790
10791 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10792                                   struct btrfs_root *csum_root)
10793 {
10794         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10795         struct btrfs_path *path;
10796         struct btrfs_root *tree_root = fs_info->tree_root;
10797         struct btrfs_root *cur_root;
10798         struct extent_buffer *node;
10799         struct btrfs_key key;
10800         int slot = 0;
10801         int ret = 0;
10802
10803         path = btrfs_alloc_path();
10804         if (!path)
10805                 return -ENOMEM;
10806
10807         key.objectid = BTRFS_FS_TREE_OBJECTID;
10808         key.offset = 0;
10809         key.type = BTRFS_ROOT_ITEM_KEY;
10810
10811         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10812         if (ret < 0)
10813                 goto out;
10814         if (ret > 0) {
10815                 ret = -ENOENT;
10816                 goto out;
10817         }
10818
10819         while (1) {
10820                 node = path->nodes[0];
10821                 slot = path->slots[0];
10822                 btrfs_item_key_to_cpu(node, &key, slot);
10823                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10824                         goto out;
10825                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10826                         goto next;
10827                 if (!is_fstree(key.objectid))
10828                         goto next;
10829                 key.offset = (u64)-1;
10830
10831                 cur_root = btrfs_read_fs_root(fs_info, &key);
10832                 if (IS_ERR(cur_root) || !cur_root) {
10833                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10834                                 key.objectid);
10835                         goto out;
10836                 }
10837                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10838                                 cur_root);
10839                 if (ret < 0)
10840                         goto out;
10841 next:
10842                 ret = btrfs_next_item(tree_root, path);
10843                 if (ret > 0) {
10844                         ret = 0;
10845                         goto out;
10846                 }
10847                 if (ret < 0)
10848                         goto out;
10849         }
10850
10851 out:
10852         btrfs_free_path(path);
10853         return ret;
10854 }
10855
10856 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10857                                       struct btrfs_root *csum_root)
10858 {
10859         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10860         struct btrfs_path *path;
10861         struct btrfs_extent_item *ei;
10862         struct extent_buffer *leaf;
10863         char *buf;
10864         struct btrfs_key key;
10865         int ret;
10866
10867         path = btrfs_alloc_path();
10868         if (!path)
10869                 return -ENOMEM;
10870
10871         key.objectid = 0;
10872         key.type = BTRFS_EXTENT_ITEM_KEY;
10873         key.offset = 0;
10874
10875         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10876         if (ret < 0) {
10877                 btrfs_free_path(path);
10878                 return ret;
10879         }
10880
10881         buf = malloc(csum_root->sectorsize);
10882         if (!buf) {
10883                 btrfs_free_path(path);
10884                 return -ENOMEM;
10885         }
10886
10887         while (1) {
10888                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10889                         ret = btrfs_next_leaf(extent_root, path);
10890                         if (ret < 0)
10891                                 break;
10892                         if (ret) {
10893                                 ret = 0;
10894                                 break;
10895                         }
10896                 }
10897                 leaf = path->nodes[0];
10898
10899                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10900                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10901                         path->slots[0]++;
10902                         continue;
10903                 }
10904
10905                 ei = btrfs_item_ptr(leaf, path->slots[0],
10906                                     struct btrfs_extent_item);
10907                 if (!(btrfs_extent_flags(leaf, ei) &
10908                       BTRFS_EXTENT_FLAG_DATA)) {
10909                         path->slots[0]++;
10910                         continue;
10911                 }
10912
10913                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10914                                     key.offset);
10915                 if (ret)
10916                         break;
10917                 path->slots[0]++;
10918         }
10919
10920         btrfs_free_path(path);
10921         free(buf);
10922         return ret;
10923 }
10924
10925 /*
10926  * Recalculate the csum and put it into the csum tree.
10927  *
10928  * Extent tree init will wipe out all the extent info, so in that case, we
10929  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10930  * will use fs/subvol trees to init the csum tree.
10931  */
10932 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10933                           struct btrfs_root *csum_root,
10934                           int search_fs_tree)
10935 {
10936         if (search_fs_tree)
10937                 return fill_csum_tree_from_fs(trans, csum_root);
10938         else
10939                 return fill_csum_tree_from_extent(trans, csum_root);
10940 }
10941
10942 static void free_roots_info_cache(void)
10943 {
10944         if (!roots_info_cache)
10945                 return;
10946
10947         while (!cache_tree_empty(roots_info_cache)) {
10948                 struct cache_extent *entry;
10949                 struct root_item_info *rii;
10950
10951                 entry = first_cache_extent(roots_info_cache);
10952                 if (!entry)
10953                         break;
10954                 remove_cache_extent(roots_info_cache, entry);
10955                 rii = container_of(entry, struct root_item_info, cache_extent);
10956                 free(rii);
10957         }
10958
10959         free(roots_info_cache);
10960         roots_info_cache = NULL;
10961 }
10962
10963 static int build_roots_info_cache(struct btrfs_fs_info *info)
10964 {
10965         int ret = 0;
10966         struct btrfs_key key;
10967         struct extent_buffer *leaf;
10968         struct btrfs_path *path;
10969
10970         if (!roots_info_cache) {
10971                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10972                 if (!roots_info_cache)
10973                         return -ENOMEM;
10974                 cache_tree_init(roots_info_cache);
10975         }
10976
10977         path = btrfs_alloc_path();
10978         if (!path)
10979                 return -ENOMEM;
10980
10981         key.objectid = 0;
10982         key.type = BTRFS_EXTENT_ITEM_KEY;
10983         key.offset = 0;
10984
10985         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10986         if (ret < 0)
10987                 goto out;
10988         leaf = path->nodes[0];
10989
10990         while (1) {
10991                 struct btrfs_key found_key;
10992                 struct btrfs_extent_item *ei;
10993                 struct btrfs_extent_inline_ref *iref;
10994                 int slot = path->slots[0];
10995                 int type;
10996                 u64 flags;
10997                 u64 root_id;
10998                 u8 level;
10999                 struct cache_extent *entry;
11000                 struct root_item_info *rii;
11001
11002                 if (slot >= btrfs_header_nritems(leaf)) {
11003                         ret = btrfs_next_leaf(info->extent_root, path);
11004                         if (ret < 0) {
11005                                 break;
11006                         } else if (ret) {
11007                                 ret = 0;
11008                                 break;
11009                         }
11010                         leaf = path->nodes[0];
11011                         slot = path->slots[0];
11012                 }
11013
11014                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11015
11016                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11017                     found_key.type != BTRFS_METADATA_ITEM_KEY)
11018                         goto next;
11019
11020                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11021                 flags = btrfs_extent_flags(leaf, ei);
11022
11023                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11024                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11025                         goto next;
11026
11027                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11028                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11029                         level = found_key.offset;
11030                 } else {
11031                         struct btrfs_tree_block_info *binfo;
11032
11033                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
11034                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11035                         level = btrfs_tree_block_level(leaf, binfo);
11036                 }
11037
11038                 /*
11039                  * For a root extent, it must be of the following type and the
11040                  * first (and only one) iref in the item.
11041                  */
11042                 type = btrfs_extent_inline_ref_type(leaf, iref);
11043                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11044                         goto next;
11045
11046                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11047                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11048                 if (!entry) {
11049                         rii = malloc(sizeof(struct root_item_info));
11050                         if (!rii) {
11051                                 ret = -ENOMEM;
11052                                 goto out;
11053                         }
11054                         rii->cache_extent.start = root_id;
11055                         rii->cache_extent.size = 1;
11056                         rii->level = (u8)-1;
11057                         entry = &rii->cache_extent;
11058                         ret = insert_cache_extent(roots_info_cache, entry);
11059                         ASSERT(ret == 0);
11060                 } else {
11061                         rii = container_of(entry, struct root_item_info,
11062                                            cache_extent);
11063                 }
11064
11065                 ASSERT(rii->cache_extent.start == root_id);
11066                 ASSERT(rii->cache_extent.size == 1);
11067
11068                 if (level > rii->level || rii->level == (u8)-1) {
11069                         rii->level = level;
11070                         rii->bytenr = found_key.objectid;
11071                         rii->gen = btrfs_extent_generation(leaf, ei);
11072                         rii->node_count = 1;
11073                 } else if (level == rii->level) {
11074                         rii->node_count++;
11075                 }
11076 next:
11077                 path->slots[0]++;
11078         }
11079
11080 out:
11081         btrfs_free_path(path);
11082
11083         return ret;
11084 }
11085
11086 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11087                                   struct btrfs_path *path,
11088                                   const struct btrfs_key *root_key,
11089                                   const int read_only_mode)
11090 {
11091         const u64 root_id = root_key->objectid;
11092         struct cache_extent *entry;
11093         struct root_item_info *rii;
11094         struct btrfs_root_item ri;
11095         unsigned long offset;
11096
11097         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11098         if (!entry) {
11099                 fprintf(stderr,
11100                         "Error: could not find extent items for root %llu\n",
11101                         root_key->objectid);
11102                 return -ENOENT;
11103         }
11104
11105         rii = container_of(entry, struct root_item_info, cache_extent);
11106         ASSERT(rii->cache_extent.start == root_id);
11107         ASSERT(rii->cache_extent.size == 1);
11108
11109         if (rii->node_count != 1) {
11110                 fprintf(stderr,
11111                         "Error: could not find btree root extent for root %llu\n",
11112                         root_id);
11113                 return -ENOENT;
11114         }
11115
11116         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11117         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11118
11119         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11120             btrfs_root_level(&ri) != rii->level ||
11121             btrfs_root_generation(&ri) != rii->gen) {
11122
11123                 /*
11124                  * If we're in repair mode but our caller told us to not update
11125                  * the root item, i.e. just check if it needs to be updated, don't
11126                  * print this message, since the caller will call us again shortly
11127                  * for the same root item without read only mode (the caller will
11128                  * open a transaction first).
11129                  */
11130                 if (!(read_only_mode && repair))
11131                         fprintf(stderr,
11132                                 "%sroot item for root %llu,"
11133                                 " current bytenr %llu, current gen %llu, current level %u,"
11134                                 " new bytenr %llu, new gen %llu, new level %u\n",
11135                                 (read_only_mode ? "" : "fixing "),
11136                                 root_id,
11137                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11138                                 btrfs_root_level(&ri),
11139                                 rii->bytenr, rii->gen, rii->level);
11140
11141                 if (btrfs_root_generation(&ri) > rii->gen) {
11142                         fprintf(stderr,
11143                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11144                                 root_id, btrfs_root_generation(&ri), rii->gen);
11145                         return -EINVAL;
11146                 }
11147
11148                 if (!read_only_mode) {
11149                         btrfs_set_root_bytenr(&ri, rii->bytenr);
11150                         btrfs_set_root_level(&ri, rii->level);
11151                         btrfs_set_root_generation(&ri, rii->gen);
11152                         write_extent_buffer(path->nodes[0], &ri,
11153                                             offset, sizeof(ri));
11154                 }
11155
11156                 return 1;
11157         }
11158
11159         return 0;
11160 }
11161
11162 /*
11163  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11164  * caused read-only snapshots to be corrupted if they were created at a moment
11165  * when the source subvolume/snapshot had orphan items. The issue was that the
11166  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11167  * node instead of the post orphan cleanup root node.
11168  * So this function, and its callees, just detects and fixes those cases. Even
11169  * though the regression was for read-only snapshots, this function applies to
11170  * any snapshot/subvolume root.
11171  * This must be run before any other repair code - not doing it so, makes other
11172  * repair code delete or modify backrefs in the extent tree for example, which
11173  * will result in an inconsistent fs after repairing the root items.
11174  */
11175 static int repair_root_items(struct btrfs_fs_info *info)
11176 {
11177         struct btrfs_path *path = NULL;
11178         struct btrfs_key key;
11179         struct extent_buffer *leaf;
11180         struct btrfs_trans_handle *trans = NULL;
11181         int ret = 0;
11182         int bad_roots = 0;
11183         int need_trans = 0;
11184
11185         ret = build_roots_info_cache(info);
11186         if (ret)
11187                 goto out;
11188
11189         path = btrfs_alloc_path();
11190         if (!path) {
11191                 ret = -ENOMEM;
11192                 goto out;
11193         }
11194
11195         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11196         key.type = BTRFS_ROOT_ITEM_KEY;
11197         key.offset = 0;
11198
11199 again:
11200         /*
11201          * Avoid opening and committing transactions if a leaf doesn't have
11202          * any root items that need to be fixed, so that we avoid rotating
11203          * backup roots unnecessarily.
11204          */
11205         if (need_trans) {
11206                 trans = btrfs_start_transaction(info->tree_root, 1);
11207                 if (IS_ERR(trans)) {
11208                         ret = PTR_ERR(trans);
11209                         goto out;
11210                 }
11211         }
11212
11213         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11214                                 0, trans ? 1 : 0);
11215         if (ret < 0)
11216                 goto out;
11217         leaf = path->nodes[0];
11218
11219         while (1) {
11220                 struct btrfs_key found_key;
11221
11222                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11223                         int no_more_keys = find_next_key(path, &key);
11224
11225                         btrfs_release_path(path);
11226                         if (trans) {
11227                                 ret = btrfs_commit_transaction(trans,
11228                                                                info->tree_root);
11229                                 trans = NULL;
11230                                 if (ret < 0)
11231                                         goto out;
11232                         }
11233                         need_trans = 0;
11234                         if (no_more_keys)
11235                                 break;
11236                         goto again;
11237                 }
11238
11239                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11240
11241                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11242                         goto next;
11243                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11244                         goto next;
11245
11246                 ret = maybe_repair_root_item(info, path, &found_key,
11247                                              trans ? 0 : 1);
11248                 if (ret < 0)
11249                         goto out;
11250                 if (ret) {
11251                         if (!trans && repair) {
11252                                 need_trans = 1;
11253                                 key = found_key;
11254                                 btrfs_release_path(path);
11255                                 goto again;
11256                         }
11257                         bad_roots++;
11258                 }
11259 next:
11260                 path->slots[0]++;
11261         }
11262         ret = 0;
11263 out:
11264         free_roots_info_cache();
11265         btrfs_free_path(path);
11266         if (trans)
11267                 btrfs_commit_transaction(trans, info->tree_root);
11268         if (ret < 0)
11269                 return ret;
11270
11271         return bad_roots;
11272 }
11273
11274 const char * const cmd_check_usage[] = {
11275         "btrfs check [options] <device>",
11276         "Check structural integrity of a filesystem (unmounted).",
11277         "Check structural integrity of an unmounted filesystem. Verify internal",
11278         "trees' consistency and item connectivity. In the repair mode try to",
11279         "fix the problems found. ",
11280         "WARNING: the repair mode is considered dangerous",
11281         "",
11282         "-s|--super <superblock>     use this superblock copy",
11283         "-b|--backup                 use the first valid backup root copy",
11284         "--repair                    try to repair the filesystem",
11285         "--readonly                  run in read-only mode (default)",
11286         "--init-csum-tree            create a new CRC tree",
11287         "--init-extent-tree          create a new extent tree",
11288         "--mode <MODE>               select mode, allows to make some memory/IO",
11289         "                            trade-offs, where MODE is one of:",
11290         "                            original - read inodes and extents to memory (requires",
11291         "                                       more memory, does less IO)",
11292         "                            lowmem   - try to use less memory but read blocks again",
11293         "                                       when needed",
11294         "--check-data-csum           verify checksums of data blocks",
11295         "-Q|--qgroup-report           print a report on qgroup consistency",
11296         "-E|--subvol-extents <subvolid>",
11297         "                            print subvolume extents and sharing state",
11298         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11299         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11300         "-p|--progress               indicate progress",
11301         NULL
11302 };
11303
11304 int cmd_check(int argc, char **argv)
11305 {
11306         struct cache_tree root_cache;
11307         struct btrfs_root *root;
11308         struct btrfs_fs_info *info;
11309         u64 bytenr = 0;
11310         u64 subvolid = 0;
11311         u64 tree_root_bytenr = 0;
11312         u64 chunk_root_bytenr = 0;
11313         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11314         int ret;
11315         u64 num;
11316         int init_csum_tree = 0;
11317         int readonly = 0;
11318         int qgroup_report = 0;
11319         int qgroups_repaired = 0;
11320         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11321
11322         while(1) {
11323                 int c;
11324                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11325                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11326                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11327                         GETOPT_VAL_MODE };
11328                 static const struct option long_options[] = {
11329                         { "super", required_argument, NULL, 's' },
11330                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11331                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11332                         { "init-csum-tree", no_argument, NULL,
11333                                 GETOPT_VAL_INIT_CSUM },
11334                         { "init-extent-tree", no_argument, NULL,
11335                                 GETOPT_VAL_INIT_EXTENT },
11336                         { "check-data-csum", no_argument, NULL,
11337                                 GETOPT_VAL_CHECK_CSUM },
11338                         { "backup", no_argument, NULL, 'b' },
11339                         { "subvol-extents", required_argument, NULL, 'E' },
11340                         { "qgroup-report", no_argument, NULL, 'Q' },
11341                         { "tree-root", required_argument, NULL, 'r' },
11342                         { "chunk-root", required_argument, NULL,
11343                                 GETOPT_VAL_CHUNK_TREE },
11344                         { "progress", no_argument, NULL, 'p' },
11345                         { "mode", required_argument, NULL,
11346                                 GETOPT_VAL_MODE },
11347                         { NULL, 0, NULL, 0}
11348                 };
11349
11350                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11351                 if (c < 0)
11352                         break;
11353                 switch(c) {
11354                         case 'a': /* ignored */ break;
11355                         case 'b':
11356                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11357                                 break;
11358                         case 's':
11359                                 num = arg_strtou64(optarg);
11360                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11361                                         fprintf(stderr,
11362                                                 "ERROR: super mirror should be less than: %d\n",
11363                                                 BTRFS_SUPER_MIRROR_MAX);
11364                                         exit(1);
11365                                 }
11366                                 bytenr = btrfs_sb_offset(((int)num));
11367                                 printf("using SB copy %llu, bytenr %llu\n", num,
11368                                        (unsigned long long)bytenr);
11369                                 break;
11370                         case 'Q':
11371                                 qgroup_report = 1;
11372                                 break;
11373                         case 'E':
11374                                 subvolid = arg_strtou64(optarg);
11375                                 break;
11376                         case 'r':
11377                                 tree_root_bytenr = arg_strtou64(optarg);
11378                                 break;
11379                         case GETOPT_VAL_CHUNK_TREE:
11380                                 chunk_root_bytenr = arg_strtou64(optarg);
11381                                 break;
11382                         case 'p':
11383                                 ctx.progress_enabled = true;
11384                                 break;
11385                         case '?':
11386                         case 'h':
11387                                 usage(cmd_check_usage);
11388                         case GETOPT_VAL_REPAIR:
11389                                 printf("enabling repair mode\n");
11390                                 repair = 1;
11391                                 ctree_flags |= OPEN_CTREE_WRITES;
11392                                 break;
11393                         case GETOPT_VAL_READONLY:
11394                                 readonly = 1;
11395                                 break;
11396                         case GETOPT_VAL_INIT_CSUM:
11397                                 printf("Creating a new CRC tree\n");
11398                                 init_csum_tree = 1;
11399                                 repair = 1;
11400                                 ctree_flags |= OPEN_CTREE_WRITES;
11401                                 break;
11402                         case GETOPT_VAL_INIT_EXTENT:
11403                                 init_extent_tree = 1;
11404                                 ctree_flags |= (OPEN_CTREE_WRITES |
11405                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11406                                 repair = 1;
11407                                 break;
11408                         case GETOPT_VAL_CHECK_CSUM:
11409                                 check_data_csum = 1;
11410                                 break;
11411                         case GETOPT_VAL_MODE:
11412                                 check_mode = parse_check_mode(optarg);
11413                                 if (check_mode == CHECK_MODE_UNKNOWN) {
11414                                         error("unknown mode: %s", optarg);
11415                                         exit(1);
11416                                 }
11417                                 break;
11418                 }
11419         }
11420
11421         if (check_argc_exact(argc - optind, 1))
11422                 usage(cmd_check_usage);
11423
11424         if (ctx.progress_enabled) {
11425                 ctx.tp = TASK_NOTHING;
11426                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11427         }
11428
11429         /* This check is the only reason for --readonly to exist */
11430         if (readonly && repair) {
11431                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11432                 exit(1);
11433         }
11434
11435         /*
11436          * Not supported yet
11437          */
11438         if (repair && check_mode == CHECK_MODE_LOWMEM) {
11439                 error("Low memory mode doesn't support repair yet");
11440                 exit(1);
11441         }
11442
11443         radix_tree_init();
11444         cache_tree_init(&root_cache);
11445
11446         if((ret = check_mounted(argv[optind])) < 0) {
11447                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11448                 goto err_out;
11449         } else if(ret) {
11450                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11451                 ret = -EBUSY;
11452                 goto err_out;
11453         }
11454
11455         /* only allow partial opening under repair mode */
11456         if (repair)
11457                 ctree_flags |= OPEN_CTREE_PARTIAL;
11458
11459         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11460                                   chunk_root_bytenr, ctree_flags);
11461         if (!info) {
11462                 fprintf(stderr, "Couldn't open file system\n");
11463                 ret = -EIO;
11464                 goto err_out;
11465         }
11466
11467         global_info = info;
11468         root = info->fs_root;
11469
11470         /*
11471          * repair mode will force us to commit transaction which
11472          * will make us fail to load log tree when mounting.
11473          */
11474         if (repair && btrfs_super_log_root(info->super_copy)) {
11475                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11476                 if (!ret) {
11477                         ret = 1;
11478                         goto close_out;
11479                 }
11480                 ret = zero_log_tree(root);
11481                 if (ret) {
11482                         fprintf(stderr, "fail to zero log tree\n");
11483                         goto close_out;
11484                 }
11485         }
11486
11487         uuid_unparse(info->super_copy->fsid, uuidbuf);
11488         if (qgroup_report) {
11489                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11490                        uuidbuf);
11491                 ret = qgroup_verify_all(info);
11492                 if (ret == 0)
11493                         report_qgroups(1);
11494                 goto close_out;
11495         }
11496         if (subvolid) {
11497                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11498                        subvolid, argv[optind], uuidbuf);
11499                 ret = print_extent_state(info, subvolid);
11500                 goto close_out;
11501         }
11502         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11503
11504         if (!extent_buffer_uptodate(info->tree_root->node) ||
11505             !extent_buffer_uptodate(info->dev_root->node) ||
11506             !extent_buffer_uptodate(info->chunk_root->node)) {
11507                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11508                 ret = -EIO;
11509                 goto close_out;
11510         }
11511
11512         if (init_extent_tree || init_csum_tree) {
11513                 struct btrfs_trans_handle *trans;
11514
11515                 trans = btrfs_start_transaction(info->extent_root, 0);
11516                 if (IS_ERR(trans)) {
11517                         fprintf(stderr, "Error starting transaction\n");
11518                         ret = PTR_ERR(trans);
11519                         goto close_out;
11520                 }
11521
11522                 if (init_extent_tree) {
11523                         printf("Creating a new extent tree\n");
11524                         ret = reinit_extent_tree(trans, info);
11525                         if (ret)
11526                                 goto close_out;
11527                 }
11528
11529                 if (init_csum_tree) {
11530                         fprintf(stderr, "Reinit crc root\n");
11531                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11532                         if (ret) {
11533                                 fprintf(stderr, "crc root initialization failed\n");
11534                                 ret = -EIO;
11535                                 goto close_out;
11536                         }
11537
11538                         ret = fill_csum_tree(trans, info->csum_root,
11539                                              init_extent_tree);
11540                         if (ret) {
11541                                 fprintf(stderr, "crc refilling failed\n");
11542                                 return -EIO;
11543                         }
11544                 }
11545                 /*
11546                  * Ok now we commit and run the normal fsck, which will add
11547                  * extent entries for all of the items it finds.
11548                  */
11549                 ret = btrfs_commit_transaction(trans, info->extent_root);
11550                 if (ret)
11551                         goto close_out;
11552         }
11553         if (!extent_buffer_uptodate(info->extent_root->node)) {
11554                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11555                 ret = -EIO;
11556                 goto close_out;
11557         }
11558         if (!extent_buffer_uptodate(info->csum_root->node)) {
11559                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11560                 ret = -EIO;
11561                 goto close_out;
11562         }
11563
11564         if (!ctx.progress_enabled)
11565                 fprintf(stderr, "checking extents\n");
11566         if (check_mode == CHECK_MODE_LOWMEM)
11567                 ret = check_chunks_and_extents_v2(root);
11568         else
11569                 ret = check_chunks_and_extents(root);
11570         if (ret)
11571                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11572
11573         ret = repair_root_items(info);
11574         if (ret < 0)
11575                 goto close_out;
11576         if (repair) {
11577                 fprintf(stderr, "Fixed %d roots.\n", ret);
11578                 ret = 0;
11579         } else if (ret > 0) {
11580                 fprintf(stderr,
11581                        "Found %d roots with an outdated root item.\n",
11582                        ret);
11583                 fprintf(stderr,
11584                         "Please run a filesystem check with the option --repair to fix them.\n");
11585                 ret = 1;
11586                 goto close_out;
11587         }
11588
11589         if (!ctx.progress_enabled) {
11590                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11591                         fprintf(stderr, "checking free space tree\n");
11592                 else
11593                         fprintf(stderr, "checking free space cache\n");
11594         }
11595         ret = check_space_cache(root);
11596         if (ret)
11597                 goto out;
11598
11599         /*
11600          * We used to have to have these hole extents in between our real
11601          * extents so if we don't have this flag set we need to make sure there
11602          * are no gaps in the file extents for inodes, otherwise we can just
11603          * ignore it when this happens.
11604          */
11605         no_holes = btrfs_fs_incompat(root->fs_info,
11606                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11607         if (!ctx.progress_enabled)
11608                 fprintf(stderr, "checking fs roots\n");
11609         ret = check_fs_roots(root, &root_cache);
11610         if (ret)
11611                 goto out;
11612
11613         fprintf(stderr, "checking csums\n");
11614         ret = check_csums(root);
11615         if (ret)
11616                 goto out;
11617
11618         fprintf(stderr, "checking root refs\n");
11619         ret = check_root_refs(root, &root_cache);
11620         if (ret)
11621                 goto out;
11622
11623         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11624                 struct extent_buffer *eb;
11625
11626                 eb = list_first_entry(&root->fs_info->recow_ebs,
11627                                       struct extent_buffer, recow);
11628                 list_del_init(&eb->recow);
11629                 ret = recow_extent_buffer(root, eb);
11630                 if (ret)
11631                         break;
11632         }
11633
11634         while (!list_empty(&delete_items)) {
11635                 struct bad_item *bad;
11636
11637                 bad = list_first_entry(&delete_items, struct bad_item, list);
11638                 list_del_init(&bad->list);
11639                 if (repair)
11640                         ret = delete_bad_item(root, bad);
11641                 free(bad);
11642         }
11643
11644         if (info->quota_enabled) {
11645                 int err;
11646                 fprintf(stderr, "checking quota groups\n");
11647                 err = qgroup_verify_all(info);
11648                 if (err)
11649                         goto out;
11650                 report_qgroups(0);
11651                 err = repair_qgroups(info, &qgroups_repaired);
11652                 if (err)
11653                         goto out;
11654         }
11655
11656         if (!list_empty(&root->fs_info->recow_ebs)) {
11657                 fprintf(stderr, "Transid errors in file system\n");
11658                 ret = 1;
11659         }
11660 out:
11661         /* Don't override original ret */
11662         if (!ret && qgroups_repaired)
11663                 ret = qgroups_repaired;
11664
11665         if (found_old_backref) { /*
11666                  * there was a disk format change when mixed
11667                  * backref was in testing tree. The old format
11668                  * existed about one week.
11669                  */
11670                 printf("\n * Found old mixed backref format. "
11671                        "The old format is not supported! *"
11672                        "\n * Please mount the FS in readonly mode, "
11673                        "backup data and re-format the FS. *\n\n");
11674                 ret = 1;
11675         }
11676         printf("found %llu bytes used err is %d\n",
11677                (unsigned long long)bytes_used, ret);
11678         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11679         printf("total tree bytes: %llu\n",
11680                (unsigned long long)total_btree_bytes);
11681         printf("total fs tree bytes: %llu\n",
11682                (unsigned long long)total_fs_tree_bytes);
11683         printf("total extent tree bytes: %llu\n",
11684                (unsigned long long)total_extent_tree_bytes);
11685         printf("btree space waste bytes: %llu\n",
11686                (unsigned long long)btree_space_waste);
11687         printf("file data blocks allocated: %llu\n referenced %llu\n",
11688                 (unsigned long long)data_bytes_allocated,
11689                 (unsigned long long)data_bytes_referenced);
11690
11691         free_qgroup_counts();
11692         free_root_recs_tree(&root_cache);
11693 close_out:
11694         close_ctree(root);
11695 err_out:
11696         if (ctx.progress_enabled)
11697                 task_deinit(ctx.info);
11698
11699         return ret;
11700 }