Merge branch 'for-jens' of git://git.drbd.org/linux-drbd into for-linus
[platform/adaptation/renesas_rcar/renesas_kernel.git] / fs / btrfs / check-integrity.c
1 /*
2  * Copyright (C) STRATO AG 2011.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 /*
20  * This module can be used to catch cases when the btrfs kernel
21  * code executes write requests to the disk that bring the file
22  * system in an inconsistent state. In such a state, a power-loss
23  * or kernel panic event would cause that the data on disk is
24  * lost or at least damaged.
25  *
26  * Code is added that examines all block write requests during
27  * runtime (including writes of the super block). Three rules
28  * are verified and an error is printed on violation of the
29  * rules:
30  * 1. It is not allowed to write a disk block which is
31  *    currently referenced by the super block (either directly
32  *    or indirectly).
33  * 2. When a super block is written, it is verified that all
34  *    referenced (directly or indirectly) blocks fulfill the
35  *    following requirements:
36  *    2a. All referenced blocks have either been present when
37  *        the file system was mounted, (i.e., they have been
38  *        referenced by the super block) or they have been
39  *        written since then and the write completion callback
40  *        was called and a FLUSH request to the device where
41  *        these blocks are located was received and completed.
42  *    2b. All referenced blocks need to have a generation
43  *        number which is equal to the parent's number.
44  *
45  * One issue that was found using this module was that the log
46  * tree on disk became temporarily corrupted because disk blocks
47  * that had been in use for the log tree had been freed and
48  * reused too early, while being referenced by the written super
49  * block.
50  *
51  * The search term in the kernel log that can be used to filter
52  * on the existence of detected integrity issues is
53  * "btrfs: attempt".
54  *
55  * The integrity check is enabled via mount options. These
56  * mount options are only supported if the integrity check
57  * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
58  *
59  * Example #1, apply integrity checks to all metadata:
60  * mount /dev/sdb1 /mnt -o check_int
61  *
62  * Example #2, apply integrity checks to all metadata and
63  * to data extents:
64  * mount /dev/sdb1 /mnt -o check_int_data
65  *
66  * Example #3, apply integrity checks to all metadata and dump
67  * the tree that the super block references to kernel messages
68  * each time after a super block was written:
69  * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
70  *
71  * If the integrity check tool is included and activated in
72  * the mount options, plenty of kernel memory is used, and
73  * plenty of additional CPU cycles are spent. Enabling this
74  * functionality is not intended for normal use. In most
75  * cases, unless you are a btrfs developer who needs to verify
76  * the integrity of (super)-block write requests, do not
77  * enable the config option BTRFS_FS_CHECK_INTEGRITY to
78  * include and compile the integrity check tool.
79  */
80
81 #include <linux/sched.h>
82 #include <linux/slab.h>
83 #include <linux/buffer_head.h>
84 #include <linux/mutex.h>
85 #include <linux/crc32c.h>
86 #include <linux/genhd.h>
87 #include <linux/blkdev.h>
88 #include "ctree.h"
89 #include "disk-io.h"
90 #include "transaction.h"
91 #include "extent_io.h"
92 #include "volumes.h"
93 #include "print-tree.h"
94 #include "locking.h"
95 #include "check-integrity.h"
96 #include "rcu-string.h"
97
98 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
99 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
100 #define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
101 #define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
102 #define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
103 #define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
104 #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
105 #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6)    /* in characters,
106                                                          * excluding " [...]" */
107 #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
108
109 /*
110  * The definition of the bitmask fields for the print_mask.
111  * They are specified with the mount option check_integrity_print_mask.
112  */
113 #define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE                     0x00000001
114 #define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION         0x00000002
115 #define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE                  0x00000004
116 #define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE                 0x00000008
117 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH                        0x00000010
118 #define BTRFSIC_PRINT_MASK_END_IO_BIO_BH                        0x00000020
119 #define BTRFSIC_PRINT_MASK_VERBOSE                              0x00000040
120 #define BTRFSIC_PRINT_MASK_VERY_VERBOSE                         0x00000080
121 #define BTRFSIC_PRINT_MASK_INITIAL_TREE                         0x00000100
122 #define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES                    0x00000200
123 #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE                     0x00000400
124 #define BTRFSIC_PRINT_MASK_NUM_COPIES                           0x00000800
125 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS                0x00001000
126
127 struct btrfsic_dev_state;
128 struct btrfsic_state;
129
130 struct btrfsic_block {
131         u32 magic_num;          /* only used for debug purposes */
132         unsigned int is_metadata:1;     /* if it is meta-data, not data-data */
133         unsigned int is_superblock:1;   /* if it is one of the superblocks */
134         unsigned int is_iodone:1;       /* if is done by lower subsystem */
135         unsigned int iodone_w_error:1;  /* error was indicated to endio */
136         unsigned int never_written:1;   /* block was added because it was
137                                          * referenced, not because it was
138                                          * written */
139         unsigned int mirror_num:2;      /* large enough to hold
140                                          * BTRFS_SUPER_MIRROR_MAX */
141         struct btrfsic_dev_state *dev_state;
142         u64 dev_bytenr;         /* key, physical byte num on disk */
143         u64 logical_bytenr;     /* logical byte num on disk */
144         u64 generation;
145         struct btrfs_disk_key disk_key; /* extra info to print in case of
146                                          * issues, will not always be correct */
147         struct list_head collision_resolving_node;      /* list node */
148         struct list_head all_blocks_node;       /* list node */
149
150         /* the following two lists contain block_link items */
151         struct list_head ref_to_list;   /* list */
152         struct list_head ref_from_list; /* list */
153         struct btrfsic_block *next_in_same_bio;
154         void *orig_bio_bh_private;
155         union {
156                 bio_end_io_t *bio;
157                 bh_end_io_t *bh;
158         } orig_bio_bh_end_io;
159         int submit_bio_bh_rw;
160         u64 flush_gen; /* only valid if !never_written */
161 };
162
163 /*
164  * Elements of this type are allocated dynamically and required because
165  * each block object can refer to and can be ref from multiple blocks.
166  * The key to lookup them in the hashtable is the dev_bytenr of
167  * the block ref to plus the one from the block refered from.
168  * The fact that they are searchable via a hashtable and that a
169  * ref_cnt is maintained is not required for the btrfs integrity
170  * check algorithm itself, it is only used to make the output more
171  * beautiful in case that an error is detected (an error is defined
172  * as a write operation to a block while that block is still referenced).
173  */
174 struct btrfsic_block_link {
175         u32 magic_num;          /* only used for debug purposes */
176         u32 ref_cnt;
177         struct list_head node_ref_to;   /* list node */
178         struct list_head node_ref_from; /* list node */
179         struct list_head collision_resolving_node;      /* list node */
180         struct btrfsic_block *block_ref_to;
181         struct btrfsic_block *block_ref_from;
182         u64 parent_generation;
183 };
184
185 struct btrfsic_dev_state {
186         u32 magic_num;          /* only used for debug purposes */
187         struct block_device *bdev;
188         struct btrfsic_state *state;
189         struct list_head collision_resolving_node;      /* list node */
190         struct btrfsic_block dummy_block_for_bio_bh_flush;
191         u64 last_flush_gen;
192         char name[BDEVNAME_SIZE];
193 };
194
195 struct btrfsic_block_hashtable {
196         struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE];
197 };
198
199 struct btrfsic_block_link_hashtable {
200         struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE];
201 };
202
203 struct btrfsic_dev_state_hashtable {
204         struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE];
205 };
206
207 struct btrfsic_block_data_ctx {
208         u64 start;              /* virtual bytenr */
209         u64 dev_bytenr;         /* physical bytenr on device */
210         u32 len;
211         struct btrfsic_dev_state *dev;
212         char **datav;
213         struct page **pagev;
214         void *mem_to_free;
215 };
216
217 /* This structure is used to implement recursion without occupying
218  * any stack space, refer to btrfsic_process_metablock() */
219 struct btrfsic_stack_frame {
220         u32 magic;
221         u32 nr;
222         int error;
223         int i;
224         int limit_nesting;
225         int num_copies;
226         int mirror_num;
227         struct btrfsic_block *block;
228         struct btrfsic_block_data_ctx *block_ctx;
229         struct btrfsic_block *next_block;
230         struct btrfsic_block_data_ctx next_block_ctx;
231         struct btrfs_header *hdr;
232         struct btrfsic_stack_frame *prev;
233 };
234
235 /* Some state per mounted filesystem */
236 struct btrfsic_state {
237         u32 print_mask;
238         int include_extent_data;
239         int csum_size;
240         struct list_head all_blocks_list;
241         struct btrfsic_block_hashtable block_hashtable;
242         struct btrfsic_block_link_hashtable block_link_hashtable;
243         struct btrfs_root *root;
244         u64 max_superblock_generation;
245         struct btrfsic_block *latest_superblock;
246         u32 metablock_size;
247         u32 datablock_size;
248 };
249
250 static void btrfsic_block_init(struct btrfsic_block *b);
251 static struct btrfsic_block *btrfsic_block_alloc(void);
252 static void btrfsic_block_free(struct btrfsic_block *b);
253 static void btrfsic_block_link_init(struct btrfsic_block_link *n);
254 static struct btrfsic_block_link *btrfsic_block_link_alloc(void);
255 static void btrfsic_block_link_free(struct btrfsic_block_link *n);
256 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds);
257 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void);
258 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds);
259 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h);
260 static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
261                                         struct btrfsic_block_hashtable *h);
262 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b);
263 static struct btrfsic_block *btrfsic_block_hashtable_lookup(
264                 struct block_device *bdev,
265                 u64 dev_bytenr,
266                 struct btrfsic_block_hashtable *h);
267 static void btrfsic_block_link_hashtable_init(
268                 struct btrfsic_block_link_hashtable *h);
269 static void btrfsic_block_link_hashtable_add(
270                 struct btrfsic_block_link *l,
271                 struct btrfsic_block_link_hashtable *h);
272 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l);
273 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
274                 struct block_device *bdev_ref_to,
275                 u64 dev_bytenr_ref_to,
276                 struct block_device *bdev_ref_from,
277                 u64 dev_bytenr_ref_from,
278                 struct btrfsic_block_link_hashtable *h);
279 static void btrfsic_dev_state_hashtable_init(
280                 struct btrfsic_dev_state_hashtable *h);
281 static void btrfsic_dev_state_hashtable_add(
282                 struct btrfsic_dev_state *ds,
283                 struct btrfsic_dev_state_hashtable *h);
284 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
285 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
286                 struct block_device *bdev,
287                 struct btrfsic_dev_state_hashtable *h);
288 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
289 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
290 static int btrfsic_process_superblock(struct btrfsic_state *state,
291                                       struct btrfs_fs_devices *fs_devices);
292 static int btrfsic_process_metablock(struct btrfsic_state *state,
293                                      struct btrfsic_block *block,
294                                      struct btrfsic_block_data_ctx *block_ctx,
295                                      int limit_nesting, int force_iodone_flag);
296 static void btrfsic_read_from_block_data(
297         struct btrfsic_block_data_ctx *block_ctx,
298         void *dst, u32 offset, size_t len);
299 static int btrfsic_create_link_to_next_block(
300                 struct btrfsic_state *state,
301                 struct btrfsic_block *block,
302                 struct btrfsic_block_data_ctx
303                 *block_ctx, u64 next_bytenr,
304                 int limit_nesting,
305                 struct btrfsic_block_data_ctx *next_block_ctx,
306                 struct btrfsic_block **next_blockp,
307                 int force_iodone_flag,
308                 int *num_copiesp, int *mirror_nump,
309                 struct btrfs_disk_key *disk_key,
310                 u64 parent_generation);
311 static int btrfsic_handle_extent_data(struct btrfsic_state *state,
312                                       struct btrfsic_block *block,
313                                       struct btrfsic_block_data_ctx *block_ctx,
314                                       u32 item_offset, int force_iodone_flag);
315 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
316                              struct btrfsic_block_data_ctx *block_ctx_out,
317                              int mirror_num);
318 static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
319                                   u32 len, struct block_device *bdev,
320                                   struct btrfsic_block_data_ctx *block_ctx_out);
321 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
322 static int btrfsic_read_block(struct btrfsic_state *state,
323                               struct btrfsic_block_data_ctx *block_ctx);
324 static void btrfsic_dump_database(struct btrfsic_state *state);
325 static void btrfsic_complete_bio_end_io(struct bio *bio, int err);
326 static int btrfsic_test_for_metadata(struct btrfsic_state *state,
327                                      char **datav, unsigned int num_pages);
328 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
329                                           u64 dev_bytenr, char **mapped_datav,
330                                           unsigned int num_pages,
331                                           struct bio *bio, int *bio_is_patched,
332                                           struct buffer_head *bh,
333                                           int submit_bio_bh_rw);
334 static int btrfsic_process_written_superblock(
335                 struct btrfsic_state *state,
336                 struct btrfsic_block *const block,
337                 struct btrfs_super_block *const super_hdr);
338 static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status);
339 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate);
340 static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
341                                               const struct btrfsic_block *block,
342                                               int recursion_level);
343 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
344                                         struct btrfsic_block *const block,
345                                         int recursion_level);
346 static void btrfsic_print_add_link(const struct btrfsic_state *state,
347                                    const struct btrfsic_block_link *l);
348 static void btrfsic_print_rem_link(const struct btrfsic_state *state,
349                                    const struct btrfsic_block_link *l);
350 static char btrfsic_get_block_type(const struct btrfsic_state *state,
351                                    const struct btrfsic_block *block);
352 static void btrfsic_dump_tree(const struct btrfsic_state *state);
353 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
354                                   const struct btrfsic_block *block,
355                                   int indent_level);
356 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
357                 struct btrfsic_state *state,
358                 struct btrfsic_block_data_ctx *next_block_ctx,
359                 struct btrfsic_block *next_block,
360                 struct btrfsic_block *from_block,
361                 u64 parent_generation);
362 static struct btrfsic_block *btrfsic_block_lookup_or_add(
363                 struct btrfsic_state *state,
364                 struct btrfsic_block_data_ctx *block_ctx,
365                 const char *additional_string,
366                 int is_metadata,
367                 int is_iodone,
368                 int never_written,
369                 int mirror_num,
370                 int *was_created);
371 static int btrfsic_process_superblock_dev_mirror(
372                 struct btrfsic_state *state,
373                 struct btrfsic_dev_state *dev_state,
374                 struct btrfs_device *device,
375                 int superblock_mirror_num,
376                 struct btrfsic_dev_state **selected_dev_state,
377                 struct btrfs_super_block *selected_super);
378 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
379                 struct block_device *bdev);
380 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
381                                            u64 bytenr,
382                                            struct btrfsic_dev_state *dev_state,
383                                            u64 dev_bytenr);
384
385 static struct mutex btrfsic_mutex;
386 static int btrfsic_is_initialized;
387 static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable;
388
389
390 static void btrfsic_block_init(struct btrfsic_block *b)
391 {
392         b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER;
393         b->dev_state = NULL;
394         b->dev_bytenr = 0;
395         b->logical_bytenr = 0;
396         b->generation = BTRFSIC_GENERATION_UNKNOWN;
397         b->disk_key.objectid = 0;
398         b->disk_key.type = 0;
399         b->disk_key.offset = 0;
400         b->is_metadata = 0;
401         b->is_superblock = 0;
402         b->is_iodone = 0;
403         b->iodone_w_error = 0;
404         b->never_written = 0;
405         b->mirror_num = 0;
406         b->next_in_same_bio = NULL;
407         b->orig_bio_bh_private = NULL;
408         b->orig_bio_bh_end_io.bio = NULL;
409         INIT_LIST_HEAD(&b->collision_resolving_node);
410         INIT_LIST_HEAD(&b->all_blocks_node);
411         INIT_LIST_HEAD(&b->ref_to_list);
412         INIT_LIST_HEAD(&b->ref_from_list);
413         b->submit_bio_bh_rw = 0;
414         b->flush_gen = 0;
415 }
416
417 static struct btrfsic_block *btrfsic_block_alloc(void)
418 {
419         struct btrfsic_block *b;
420
421         b = kzalloc(sizeof(*b), GFP_NOFS);
422         if (NULL != b)
423                 btrfsic_block_init(b);
424
425         return b;
426 }
427
428 static void btrfsic_block_free(struct btrfsic_block *b)
429 {
430         BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num));
431         kfree(b);
432 }
433
434 static void btrfsic_block_link_init(struct btrfsic_block_link *l)
435 {
436         l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER;
437         l->ref_cnt = 1;
438         INIT_LIST_HEAD(&l->node_ref_to);
439         INIT_LIST_HEAD(&l->node_ref_from);
440         INIT_LIST_HEAD(&l->collision_resolving_node);
441         l->block_ref_to = NULL;
442         l->block_ref_from = NULL;
443 }
444
445 static struct btrfsic_block_link *btrfsic_block_link_alloc(void)
446 {
447         struct btrfsic_block_link *l;
448
449         l = kzalloc(sizeof(*l), GFP_NOFS);
450         if (NULL != l)
451                 btrfsic_block_link_init(l);
452
453         return l;
454 }
455
456 static void btrfsic_block_link_free(struct btrfsic_block_link *l)
457 {
458         BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num));
459         kfree(l);
460 }
461
462 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
463 {
464         ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
465         ds->bdev = NULL;
466         ds->state = NULL;
467         ds->name[0] = '\0';
468         INIT_LIST_HEAD(&ds->collision_resolving_node);
469         ds->last_flush_gen = 0;
470         btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
471         ds->dummy_block_for_bio_bh_flush.is_iodone = 1;
472         ds->dummy_block_for_bio_bh_flush.dev_state = ds;
473 }
474
475 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void)
476 {
477         struct btrfsic_dev_state *ds;
478
479         ds = kzalloc(sizeof(*ds), GFP_NOFS);
480         if (NULL != ds)
481                 btrfsic_dev_state_init(ds);
482
483         return ds;
484 }
485
486 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds)
487 {
488         BUG_ON(!(NULL == ds ||
489                  BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num));
490         kfree(ds);
491 }
492
493 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h)
494 {
495         int i;
496
497         for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++)
498                 INIT_LIST_HEAD(h->table + i);
499 }
500
501 static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
502                                         struct btrfsic_block_hashtable *h)
503 {
504         const unsigned int hashval =
505             (((unsigned int)(b->dev_bytenr >> 16)) ^
506              ((unsigned int)((uintptr_t)b->dev_state->bdev))) &
507              (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
508
509         list_add(&b->collision_resolving_node, h->table + hashval);
510 }
511
512 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b)
513 {
514         list_del(&b->collision_resolving_node);
515 }
516
517 static struct btrfsic_block *btrfsic_block_hashtable_lookup(
518                 struct block_device *bdev,
519                 u64 dev_bytenr,
520                 struct btrfsic_block_hashtable *h)
521 {
522         const unsigned int hashval =
523             (((unsigned int)(dev_bytenr >> 16)) ^
524              ((unsigned int)((uintptr_t)bdev))) &
525              (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
526         struct list_head *elem;
527
528         list_for_each(elem, h->table + hashval) {
529                 struct btrfsic_block *const b =
530                     list_entry(elem, struct btrfsic_block,
531                                collision_resolving_node);
532
533                 if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr)
534                         return b;
535         }
536
537         return NULL;
538 }
539
540 static void btrfsic_block_link_hashtable_init(
541                 struct btrfsic_block_link_hashtable *h)
542 {
543         int i;
544
545         for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++)
546                 INIT_LIST_HEAD(h->table + i);
547 }
548
549 static void btrfsic_block_link_hashtable_add(
550                 struct btrfsic_block_link *l,
551                 struct btrfsic_block_link_hashtable *h)
552 {
553         const unsigned int hashval =
554             (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^
555              ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^
556              ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^
557              ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev)))
558              & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
559
560         BUG_ON(NULL == l->block_ref_to);
561         BUG_ON(NULL == l->block_ref_from);
562         list_add(&l->collision_resolving_node, h->table + hashval);
563 }
564
565 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l)
566 {
567         list_del(&l->collision_resolving_node);
568 }
569
570 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
571                 struct block_device *bdev_ref_to,
572                 u64 dev_bytenr_ref_to,
573                 struct block_device *bdev_ref_from,
574                 u64 dev_bytenr_ref_from,
575                 struct btrfsic_block_link_hashtable *h)
576 {
577         const unsigned int hashval =
578             (((unsigned int)(dev_bytenr_ref_to >> 16)) ^
579              ((unsigned int)(dev_bytenr_ref_from >> 16)) ^
580              ((unsigned int)((uintptr_t)bdev_ref_to)) ^
581              ((unsigned int)((uintptr_t)bdev_ref_from))) &
582              (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
583         struct list_head *elem;
584
585         list_for_each(elem, h->table + hashval) {
586                 struct btrfsic_block_link *const l =
587                     list_entry(elem, struct btrfsic_block_link,
588                                collision_resolving_node);
589
590                 BUG_ON(NULL == l->block_ref_to);
591                 BUG_ON(NULL == l->block_ref_from);
592                 if (l->block_ref_to->dev_state->bdev == bdev_ref_to &&
593                     l->block_ref_to->dev_bytenr == dev_bytenr_ref_to &&
594                     l->block_ref_from->dev_state->bdev == bdev_ref_from &&
595                     l->block_ref_from->dev_bytenr == dev_bytenr_ref_from)
596                         return l;
597         }
598
599         return NULL;
600 }
601
602 static void btrfsic_dev_state_hashtable_init(
603                 struct btrfsic_dev_state_hashtable *h)
604 {
605         int i;
606
607         for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++)
608                 INIT_LIST_HEAD(h->table + i);
609 }
610
611 static void btrfsic_dev_state_hashtable_add(
612                 struct btrfsic_dev_state *ds,
613                 struct btrfsic_dev_state_hashtable *h)
614 {
615         const unsigned int hashval =
616             (((unsigned int)((uintptr_t)ds->bdev)) &
617              (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
618
619         list_add(&ds->collision_resolving_node, h->table + hashval);
620 }
621
622 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
623 {
624         list_del(&ds->collision_resolving_node);
625 }
626
627 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
628                 struct block_device *bdev,
629                 struct btrfsic_dev_state_hashtable *h)
630 {
631         const unsigned int hashval =
632             (((unsigned int)((uintptr_t)bdev)) &
633              (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
634         struct list_head *elem;
635
636         list_for_each(elem, h->table + hashval) {
637                 struct btrfsic_dev_state *const ds =
638                     list_entry(elem, struct btrfsic_dev_state,
639                                collision_resolving_node);
640
641                 if (ds->bdev == bdev)
642                         return ds;
643         }
644
645         return NULL;
646 }
647
648 static int btrfsic_process_superblock(struct btrfsic_state *state,
649                                       struct btrfs_fs_devices *fs_devices)
650 {
651         int ret = 0;
652         struct btrfs_super_block *selected_super;
653         struct list_head *dev_head = &fs_devices->devices;
654         struct btrfs_device *device;
655         struct btrfsic_dev_state *selected_dev_state = NULL;
656         int pass;
657
658         BUG_ON(NULL == state);
659         selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
660         if (NULL == selected_super) {
661                 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
662                 return -1;
663         }
664
665         list_for_each_entry(device, dev_head, dev_list) {
666                 int i;
667                 struct btrfsic_dev_state *dev_state;
668
669                 if (!device->bdev || !device->name)
670                         continue;
671
672                 dev_state = btrfsic_dev_state_lookup(device->bdev);
673                 BUG_ON(NULL == dev_state);
674                 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
675                         ret = btrfsic_process_superblock_dev_mirror(
676                                         state, dev_state, device, i,
677                                         &selected_dev_state, selected_super);
678                         if (0 != ret && 0 == i) {
679                                 kfree(selected_super);
680                                 return ret;
681                         }
682                 }
683         }
684
685         if (NULL == state->latest_superblock) {
686                 printk(KERN_INFO "btrfsic: no superblock found!\n");
687                 kfree(selected_super);
688                 return -1;
689         }
690
691         state->csum_size = btrfs_super_csum_size(selected_super);
692
693         for (pass = 0; pass < 3; pass++) {
694                 int num_copies;
695                 int mirror_num;
696                 u64 next_bytenr;
697
698                 switch (pass) {
699                 case 0:
700                         next_bytenr = btrfs_super_root(selected_super);
701                         if (state->print_mask &
702                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
703                                 printk(KERN_INFO "root@%llu\n",
704                                        (unsigned long long)next_bytenr);
705                         break;
706                 case 1:
707                         next_bytenr = btrfs_super_chunk_root(selected_super);
708                         if (state->print_mask &
709                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
710                                 printk(KERN_INFO "chunk@%llu\n",
711                                        (unsigned long long)next_bytenr);
712                         break;
713                 case 2:
714                         next_bytenr = btrfs_super_log_root(selected_super);
715                         if (0 == next_bytenr)
716                                 continue;
717                         if (state->print_mask &
718                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
719                                 printk(KERN_INFO "log@%llu\n",
720                                        (unsigned long long)next_bytenr);
721                         break;
722                 }
723
724                 num_copies =
725                     btrfs_num_copies(&state->root->fs_info->mapping_tree,
726                                      next_bytenr, state->metablock_size);
727                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
728                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
729                                (unsigned long long)next_bytenr, num_copies);
730
731                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
732                         struct btrfsic_block *next_block;
733                         struct btrfsic_block_data_ctx tmp_next_block_ctx;
734                         struct btrfsic_block_link *l;
735
736                         ret = btrfsic_map_block(state, next_bytenr,
737                                                 state->metablock_size,
738                                                 &tmp_next_block_ctx,
739                                                 mirror_num);
740                         if (ret) {
741                                 printk(KERN_INFO "btrfsic:"
742                                        " btrfsic_map_block(root @%llu,"
743                                        " mirror %d) failed!\n",
744                                        (unsigned long long)next_bytenr,
745                                        mirror_num);
746                                 kfree(selected_super);
747                                 return -1;
748                         }
749
750                         next_block = btrfsic_block_hashtable_lookup(
751                                         tmp_next_block_ctx.dev->bdev,
752                                         tmp_next_block_ctx.dev_bytenr,
753                                         &state->block_hashtable);
754                         BUG_ON(NULL == next_block);
755
756                         l = btrfsic_block_link_hashtable_lookup(
757                                         tmp_next_block_ctx.dev->bdev,
758                                         tmp_next_block_ctx.dev_bytenr,
759                                         state->latest_superblock->dev_state->
760                                         bdev,
761                                         state->latest_superblock->dev_bytenr,
762                                         &state->block_link_hashtable);
763                         BUG_ON(NULL == l);
764
765                         ret = btrfsic_read_block(state, &tmp_next_block_ctx);
766                         if (ret < (int)PAGE_CACHE_SIZE) {
767                                 printk(KERN_INFO
768                                        "btrfsic: read @logical %llu failed!\n",
769                                        (unsigned long long)
770                                        tmp_next_block_ctx.start);
771                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
772                                 kfree(selected_super);
773                                 return -1;
774                         }
775
776                         ret = btrfsic_process_metablock(state,
777                                                         next_block,
778                                                         &tmp_next_block_ctx,
779                                                         BTRFS_MAX_LEVEL + 3, 1);
780                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
781                 }
782         }
783
784         kfree(selected_super);
785         return ret;
786 }
787
788 static int btrfsic_process_superblock_dev_mirror(
789                 struct btrfsic_state *state,
790                 struct btrfsic_dev_state *dev_state,
791                 struct btrfs_device *device,
792                 int superblock_mirror_num,
793                 struct btrfsic_dev_state **selected_dev_state,
794                 struct btrfs_super_block *selected_super)
795 {
796         struct btrfs_super_block *super_tmp;
797         u64 dev_bytenr;
798         struct buffer_head *bh;
799         struct btrfsic_block *superblock_tmp;
800         int pass;
801         struct block_device *const superblock_bdev = device->bdev;
802
803         /* super block bytenr is always the unmapped device bytenr */
804         dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
805         if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
806                 return -1;
807         bh = __bread(superblock_bdev, dev_bytenr / 4096,
808                      BTRFS_SUPER_INFO_SIZE);
809         if (NULL == bh)
810                 return -1;
811         super_tmp = (struct btrfs_super_block *)
812             (bh->b_data + (dev_bytenr & 4095));
813
814         if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
815             strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC,
816                     sizeof(super_tmp->magic)) ||
817             memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
818             btrfs_super_nodesize(super_tmp) != state->metablock_size ||
819             btrfs_super_leafsize(super_tmp) != state->metablock_size ||
820             btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
821                 brelse(bh);
822                 return 0;
823         }
824
825         superblock_tmp =
826             btrfsic_block_hashtable_lookup(superblock_bdev,
827                                            dev_bytenr,
828                                            &state->block_hashtable);
829         if (NULL == superblock_tmp) {
830                 superblock_tmp = btrfsic_block_alloc();
831                 if (NULL == superblock_tmp) {
832                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
833                         brelse(bh);
834                         return -1;
835                 }
836                 /* for superblock, only the dev_bytenr makes sense */
837                 superblock_tmp->dev_bytenr = dev_bytenr;
838                 superblock_tmp->dev_state = dev_state;
839                 superblock_tmp->logical_bytenr = dev_bytenr;
840                 superblock_tmp->generation = btrfs_super_generation(super_tmp);
841                 superblock_tmp->is_metadata = 1;
842                 superblock_tmp->is_superblock = 1;
843                 superblock_tmp->is_iodone = 1;
844                 superblock_tmp->never_written = 0;
845                 superblock_tmp->mirror_num = 1 + superblock_mirror_num;
846                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
847                         printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)"
848                                      " @%llu (%s/%llu/%d)\n",
849                                      superblock_bdev,
850                                      rcu_str_deref(device->name),
851                                      (unsigned long long)dev_bytenr,
852                                      dev_state->name,
853                                      (unsigned long long)dev_bytenr,
854                                      superblock_mirror_num);
855                 list_add(&superblock_tmp->all_blocks_node,
856                          &state->all_blocks_list);
857                 btrfsic_block_hashtable_add(superblock_tmp,
858                                             &state->block_hashtable);
859         }
860
861         /* select the one with the highest generation field */
862         if (btrfs_super_generation(super_tmp) >
863             state->max_superblock_generation ||
864             0 == state->max_superblock_generation) {
865                 memcpy(selected_super, super_tmp, sizeof(*selected_super));
866                 *selected_dev_state = dev_state;
867                 state->max_superblock_generation =
868                     btrfs_super_generation(super_tmp);
869                 state->latest_superblock = superblock_tmp;
870         }
871
872         for (pass = 0; pass < 3; pass++) {
873                 u64 next_bytenr;
874                 int num_copies;
875                 int mirror_num;
876                 const char *additional_string = NULL;
877                 struct btrfs_disk_key tmp_disk_key;
878
879                 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
880                 tmp_disk_key.offset = 0;
881                 switch (pass) {
882                 case 0:
883                         tmp_disk_key.objectid =
884                             cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
885                         additional_string = "initial root ";
886                         next_bytenr = btrfs_super_root(super_tmp);
887                         break;
888                 case 1:
889                         tmp_disk_key.objectid =
890                             cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
891                         additional_string = "initial chunk ";
892                         next_bytenr = btrfs_super_chunk_root(super_tmp);
893                         break;
894                 case 2:
895                         tmp_disk_key.objectid =
896                             cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
897                         additional_string = "initial log ";
898                         next_bytenr = btrfs_super_log_root(super_tmp);
899                         if (0 == next_bytenr)
900                                 continue;
901                         break;
902                 }
903
904                 num_copies =
905                     btrfs_num_copies(&state->root->fs_info->mapping_tree,
906                                      next_bytenr, state->metablock_size);
907                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
908                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
909                                (unsigned long long)next_bytenr, num_copies);
910                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
911                         struct btrfsic_block *next_block;
912                         struct btrfsic_block_data_ctx tmp_next_block_ctx;
913                         struct btrfsic_block_link *l;
914
915                         if (btrfsic_map_block(state, next_bytenr,
916                                               state->metablock_size,
917                                               &tmp_next_block_ctx,
918                                               mirror_num)) {
919                                 printk(KERN_INFO "btrfsic: btrfsic_map_block("
920                                        "bytenr @%llu, mirror %d) failed!\n",
921                                        (unsigned long long)next_bytenr,
922                                        mirror_num);
923                                 brelse(bh);
924                                 return -1;
925                         }
926
927                         next_block = btrfsic_block_lookup_or_add(
928                                         state, &tmp_next_block_ctx,
929                                         additional_string, 1, 1, 0,
930                                         mirror_num, NULL);
931                         if (NULL == next_block) {
932                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
933                                 brelse(bh);
934                                 return -1;
935                         }
936
937                         next_block->disk_key = tmp_disk_key;
938                         next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
939                         l = btrfsic_block_link_lookup_or_add(
940                                         state, &tmp_next_block_ctx,
941                                         next_block, superblock_tmp,
942                                         BTRFSIC_GENERATION_UNKNOWN);
943                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
944                         if (NULL == l) {
945                                 brelse(bh);
946                                 return -1;
947                         }
948                 }
949         }
950         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
951                 btrfsic_dump_tree_sub(state, superblock_tmp, 0);
952
953         brelse(bh);
954         return 0;
955 }
956
957 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
958 {
959         struct btrfsic_stack_frame *sf;
960
961         sf = kzalloc(sizeof(*sf), GFP_NOFS);
962         if (NULL == sf)
963                 printk(KERN_INFO "btrfsic: alloc memory failed!\n");
964         else
965                 sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
966         return sf;
967 }
968
969 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf)
970 {
971         BUG_ON(!(NULL == sf ||
972                  BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic));
973         kfree(sf);
974 }
975
976 static int btrfsic_process_metablock(
977                 struct btrfsic_state *state,
978                 struct btrfsic_block *const first_block,
979                 struct btrfsic_block_data_ctx *const first_block_ctx,
980                 int first_limit_nesting, int force_iodone_flag)
981 {
982         struct btrfsic_stack_frame initial_stack_frame = { 0 };
983         struct btrfsic_stack_frame *sf;
984         struct btrfsic_stack_frame *next_stack;
985         struct btrfs_header *const first_hdr =
986                 (struct btrfs_header *)first_block_ctx->datav[0];
987
988         BUG_ON(!first_hdr);
989         sf = &initial_stack_frame;
990         sf->error = 0;
991         sf->i = -1;
992         sf->limit_nesting = first_limit_nesting;
993         sf->block = first_block;
994         sf->block_ctx = first_block_ctx;
995         sf->next_block = NULL;
996         sf->hdr = first_hdr;
997         sf->prev = NULL;
998
999 continue_with_new_stack_frame:
1000         sf->block->generation = le64_to_cpu(sf->hdr->generation);
1001         if (0 == sf->hdr->level) {
1002                 struct btrfs_leaf *const leafhdr =
1003                     (struct btrfs_leaf *)sf->hdr;
1004
1005                 if (-1 == sf->i) {
1006                         sf->nr = le32_to_cpu(leafhdr->header.nritems);
1007
1008                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1009                                 printk(KERN_INFO
1010                                        "leaf %llu items %d generation %llu"
1011                                        " owner %llu\n",
1012                                        (unsigned long long)
1013                                        sf->block_ctx->start,
1014                                        sf->nr,
1015                                        (unsigned long long)
1016                                        le64_to_cpu(leafhdr->header.generation),
1017                                        (unsigned long long)
1018                                        le64_to_cpu(leafhdr->header.owner));
1019                 }
1020
1021 continue_with_current_leaf_stack_frame:
1022                 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1023                         sf->i++;
1024                         sf->num_copies = 0;
1025                 }
1026
1027                 if (sf->i < sf->nr) {
1028                         struct btrfs_item disk_item;
1029                         u32 disk_item_offset =
1030                                 (uintptr_t)(leafhdr->items + sf->i) -
1031                                 (uintptr_t)leafhdr;
1032                         struct btrfs_disk_key *disk_key;
1033                         u8 type;
1034                         u32 item_offset;
1035                         u32 item_size;
1036
1037                         if (disk_item_offset + sizeof(struct btrfs_item) >
1038                             sf->block_ctx->len) {
1039 leaf_item_out_of_bounce_error:
1040                                 printk(KERN_INFO
1041                                        "btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
1042                                        sf->block_ctx->start,
1043                                        sf->block_ctx->dev->name);
1044                                 goto one_stack_frame_backwards;
1045                         }
1046                         btrfsic_read_from_block_data(sf->block_ctx,
1047                                                      &disk_item,
1048                                                      disk_item_offset,
1049                                                      sizeof(struct btrfs_item));
1050                         item_offset = le32_to_cpu(disk_item.offset);
1051                         item_size = le32_to_cpu(disk_item.size);
1052                         disk_key = &disk_item.key;
1053                         type = disk_key->type;
1054
1055                         if (BTRFS_ROOT_ITEM_KEY == type) {
1056                                 struct btrfs_root_item root_item;
1057                                 u32 root_item_offset;
1058                                 u64 next_bytenr;
1059
1060                                 root_item_offset = item_offset +
1061                                         offsetof(struct btrfs_leaf, items);
1062                                 if (root_item_offset + item_size >
1063                                     sf->block_ctx->len)
1064                                         goto leaf_item_out_of_bounce_error;
1065                                 btrfsic_read_from_block_data(
1066                                         sf->block_ctx, &root_item,
1067                                         root_item_offset,
1068                                         item_size);
1069                                 next_bytenr = le64_to_cpu(root_item.bytenr);
1070
1071                                 sf->error =
1072                                     btrfsic_create_link_to_next_block(
1073                                                 state,
1074                                                 sf->block,
1075                                                 sf->block_ctx,
1076                                                 next_bytenr,
1077                                                 sf->limit_nesting,
1078                                                 &sf->next_block_ctx,
1079                                                 &sf->next_block,
1080                                                 force_iodone_flag,
1081                                                 &sf->num_copies,
1082                                                 &sf->mirror_num,
1083                                                 disk_key,
1084                                                 le64_to_cpu(root_item.
1085                                                 generation));
1086                                 if (sf->error)
1087                                         goto one_stack_frame_backwards;
1088
1089                                 if (NULL != sf->next_block) {
1090                                         struct btrfs_header *const next_hdr =
1091                                             (struct btrfs_header *)
1092                                             sf->next_block_ctx.datav[0];
1093
1094                                         next_stack =
1095                                             btrfsic_stack_frame_alloc();
1096                                         if (NULL == next_stack) {
1097                                                 btrfsic_release_block_ctx(
1098                                                                 &sf->
1099                                                                 next_block_ctx);
1100                                                 goto one_stack_frame_backwards;
1101                                         }
1102
1103                                         next_stack->i = -1;
1104                                         next_stack->block = sf->next_block;
1105                                         next_stack->block_ctx =
1106                                             &sf->next_block_ctx;
1107                                         next_stack->next_block = NULL;
1108                                         next_stack->hdr = next_hdr;
1109                                         next_stack->limit_nesting =
1110                                             sf->limit_nesting - 1;
1111                                         next_stack->prev = sf;
1112                                         sf = next_stack;
1113                                         goto continue_with_new_stack_frame;
1114                                 }
1115                         } else if (BTRFS_EXTENT_DATA_KEY == type &&
1116                                    state->include_extent_data) {
1117                                 sf->error = btrfsic_handle_extent_data(
1118                                                 state,
1119                                                 sf->block,
1120                                                 sf->block_ctx,
1121                                                 item_offset,
1122                                                 force_iodone_flag);
1123                                 if (sf->error)
1124                                         goto one_stack_frame_backwards;
1125                         }
1126
1127                         goto continue_with_current_leaf_stack_frame;
1128                 }
1129         } else {
1130                 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
1131
1132                 if (-1 == sf->i) {
1133                         sf->nr = le32_to_cpu(nodehdr->header.nritems);
1134
1135                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1136                                 printk(KERN_INFO "node %llu level %d items %d"
1137                                        " generation %llu owner %llu\n",
1138                                        (unsigned long long)
1139                                        sf->block_ctx->start,
1140                                        nodehdr->header.level, sf->nr,
1141                                        (unsigned long long)
1142                                        le64_to_cpu(nodehdr->header.generation),
1143                                        (unsigned long long)
1144                                        le64_to_cpu(nodehdr->header.owner));
1145                 }
1146
1147 continue_with_current_node_stack_frame:
1148                 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1149                         sf->i++;
1150                         sf->num_copies = 0;
1151                 }
1152
1153                 if (sf->i < sf->nr) {
1154                         struct btrfs_key_ptr key_ptr;
1155                         u32 key_ptr_offset;
1156                         u64 next_bytenr;
1157
1158                         key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) -
1159                                           (uintptr_t)nodehdr;
1160                         if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
1161                             sf->block_ctx->len) {
1162                                 printk(KERN_INFO
1163                                        "btrfsic: node item out of bounce at logical %llu, dev %s\n",
1164                                        sf->block_ctx->start,
1165                                        sf->block_ctx->dev->name);
1166                                 goto one_stack_frame_backwards;
1167                         }
1168                         btrfsic_read_from_block_data(
1169                                 sf->block_ctx, &key_ptr, key_ptr_offset,
1170                                 sizeof(struct btrfs_key_ptr));
1171                         next_bytenr = le64_to_cpu(key_ptr.blockptr);
1172
1173                         sf->error = btrfsic_create_link_to_next_block(
1174                                         state,
1175                                         sf->block,
1176                                         sf->block_ctx,
1177                                         next_bytenr,
1178                                         sf->limit_nesting,
1179                                         &sf->next_block_ctx,
1180                                         &sf->next_block,
1181                                         force_iodone_flag,
1182                                         &sf->num_copies,
1183                                         &sf->mirror_num,
1184                                         &key_ptr.key,
1185                                         le64_to_cpu(key_ptr.generation));
1186                         if (sf->error)
1187                                 goto one_stack_frame_backwards;
1188
1189                         if (NULL != sf->next_block) {
1190                                 struct btrfs_header *const next_hdr =
1191                                     (struct btrfs_header *)
1192                                     sf->next_block_ctx.datav[0];
1193
1194                                 next_stack = btrfsic_stack_frame_alloc();
1195                                 if (NULL == next_stack)
1196                                         goto one_stack_frame_backwards;
1197
1198                                 next_stack->i = -1;
1199                                 next_stack->block = sf->next_block;
1200                                 next_stack->block_ctx = &sf->next_block_ctx;
1201                                 next_stack->next_block = NULL;
1202                                 next_stack->hdr = next_hdr;
1203                                 next_stack->limit_nesting =
1204                                     sf->limit_nesting - 1;
1205                                 next_stack->prev = sf;
1206                                 sf = next_stack;
1207                                 goto continue_with_new_stack_frame;
1208                         }
1209
1210                         goto continue_with_current_node_stack_frame;
1211                 }
1212         }
1213
1214 one_stack_frame_backwards:
1215         if (NULL != sf->prev) {
1216                 struct btrfsic_stack_frame *const prev = sf->prev;
1217
1218                 /* the one for the initial block is freed in the caller */
1219                 btrfsic_release_block_ctx(sf->block_ctx);
1220
1221                 if (sf->error) {
1222                         prev->error = sf->error;
1223                         btrfsic_stack_frame_free(sf);
1224                         sf = prev;
1225                         goto one_stack_frame_backwards;
1226                 }
1227
1228                 btrfsic_stack_frame_free(sf);
1229                 sf = prev;
1230                 goto continue_with_new_stack_frame;
1231         } else {
1232                 BUG_ON(&initial_stack_frame != sf);
1233         }
1234
1235         return sf->error;
1236 }
1237
1238 static void btrfsic_read_from_block_data(
1239         struct btrfsic_block_data_ctx *block_ctx,
1240         void *dstv, u32 offset, size_t len)
1241 {
1242         size_t cur;
1243         size_t offset_in_page;
1244         char *kaddr;
1245         char *dst = (char *)dstv;
1246         size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1);
1247         unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
1248
1249         WARN_ON(offset + len > block_ctx->len);
1250         offset_in_page = (start_offset + offset) &
1251                          ((unsigned long)PAGE_CACHE_SIZE - 1);
1252
1253         while (len > 0) {
1254                 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
1255                 BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >>
1256                             PAGE_CACHE_SHIFT);
1257                 kaddr = block_ctx->datav[i];
1258                 memcpy(dst, kaddr + offset_in_page, cur);
1259
1260                 dst += cur;
1261                 len -= cur;
1262                 offset_in_page = 0;
1263                 i++;
1264         }
1265 }
1266
1267 static int btrfsic_create_link_to_next_block(
1268                 struct btrfsic_state *state,
1269                 struct btrfsic_block *block,
1270                 struct btrfsic_block_data_ctx *block_ctx,
1271                 u64 next_bytenr,
1272                 int limit_nesting,
1273                 struct btrfsic_block_data_ctx *next_block_ctx,
1274                 struct btrfsic_block **next_blockp,
1275                 int force_iodone_flag,
1276                 int *num_copiesp, int *mirror_nump,
1277                 struct btrfs_disk_key *disk_key,
1278                 u64 parent_generation)
1279 {
1280         struct btrfsic_block *next_block = NULL;
1281         int ret;
1282         struct btrfsic_block_link *l;
1283         int did_alloc_block_link;
1284         int block_was_created;
1285
1286         *next_blockp = NULL;
1287         if (0 == *num_copiesp) {
1288                 *num_copiesp =
1289                     btrfs_num_copies(&state->root->fs_info->mapping_tree,
1290                                      next_bytenr, state->metablock_size);
1291                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1292                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1293                                (unsigned long long)next_bytenr, *num_copiesp);
1294                 *mirror_nump = 1;
1295         }
1296
1297         if (*mirror_nump > *num_copiesp)
1298                 return 0;
1299
1300         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1301                 printk(KERN_INFO
1302                        "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
1303                        *mirror_nump);
1304         ret = btrfsic_map_block(state, next_bytenr,
1305                                 state->metablock_size,
1306                                 next_block_ctx, *mirror_nump);
1307         if (ret) {
1308                 printk(KERN_INFO
1309                        "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1310                        (unsigned long long)next_bytenr, *mirror_nump);
1311                 btrfsic_release_block_ctx(next_block_ctx);
1312                 *next_blockp = NULL;
1313                 return -1;
1314         }
1315
1316         next_block = btrfsic_block_lookup_or_add(state,
1317                                                  next_block_ctx, "referenced ",
1318                                                  1, force_iodone_flag,
1319                                                  !force_iodone_flag,
1320                                                  *mirror_nump,
1321                                                  &block_was_created);
1322         if (NULL == next_block) {
1323                 btrfsic_release_block_ctx(next_block_ctx);
1324                 *next_blockp = NULL;
1325                 return -1;
1326         }
1327         if (block_was_created) {
1328                 l = NULL;
1329                 next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
1330         } else {
1331                 if (next_block->logical_bytenr != next_bytenr &&
1332                     !(!next_block->is_metadata &&
1333                       0 == next_block->logical_bytenr)) {
1334                         printk(KERN_INFO
1335                                "Referenced block @%llu (%s/%llu/%d)"
1336                                " found in hash table, %c,"
1337                                " bytenr mismatch (!= stored %llu).\n",
1338                                (unsigned long long)next_bytenr,
1339                                next_block_ctx->dev->name,
1340                                (unsigned long long)next_block_ctx->dev_bytenr,
1341                                *mirror_nump,
1342                                btrfsic_get_block_type(state, next_block),
1343                                (unsigned long long)next_block->logical_bytenr);
1344                 } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1345                         printk(KERN_INFO
1346                                "Referenced block @%llu (%s/%llu/%d)"
1347                                " found in hash table, %c.\n",
1348                                (unsigned long long)next_bytenr,
1349                                next_block_ctx->dev->name,
1350                                (unsigned long long)next_block_ctx->dev_bytenr,
1351                                *mirror_nump,
1352                                btrfsic_get_block_type(state, next_block));
1353                 next_block->logical_bytenr = next_bytenr;
1354
1355                 next_block->mirror_num = *mirror_nump;
1356                 l = btrfsic_block_link_hashtable_lookup(
1357                                 next_block_ctx->dev->bdev,
1358                                 next_block_ctx->dev_bytenr,
1359                                 block_ctx->dev->bdev,
1360                                 block_ctx->dev_bytenr,
1361                                 &state->block_link_hashtable);
1362         }
1363
1364         next_block->disk_key = *disk_key;
1365         if (NULL == l) {
1366                 l = btrfsic_block_link_alloc();
1367                 if (NULL == l) {
1368                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
1369                         btrfsic_release_block_ctx(next_block_ctx);
1370                         *next_blockp = NULL;
1371                         return -1;
1372                 }
1373
1374                 did_alloc_block_link = 1;
1375                 l->block_ref_to = next_block;
1376                 l->block_ref_from = block;
1377                 l->ref_cnt = 1;
1378                 l->parent_generation = parent_generation;
1379
1380                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1381                         btrfsic_print_add_link(state, l);
1382
1383                 list_add(&l->node_ref_to, &block->ref_to_list);
1384                 list_add(&l->node_ref_from, &next_block->ref_from_list);
1385
1386                 btrfsic_block_link_hashtable_add(l,
1387                                                  &state->block_link_hashtable);
1388         } else {
1389                 did_alloc_block_link = 0;
1390                 if (0 == limit_nesting) {
1391                         l->ref_cnt++;
1392                         l->parent_generation = parent_generation;
1393                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1394                                 btrfsic_print_add_link(state, l);
1395                 }
1396         }
1397
1398         if (limit_nesting > 0 && did_alloc_block_link) {
1399                 ret = btrfsic_read_block(state, next_block_ctx);
1400                 if (ret < (int)next_block_ctx->len) {
1401                         printk(KERN_INFO
1402                                "btrfsic: read block @logical %llu failed!\n",
1403                                (unsigned long long)next_bytenr);
1404                         btrfsic_release_block_ctx(next_block_ctx);
1405                         *next_blockp = NULL;
1406                         return -1;
1407                 }
1408
1409                 *next_blockp = next_block;
1410         } else {
1411                 *next_blockp = NULL;
1412         }
1413         (*mirror_nump)++;
1414
1415         return 0;
1416 }
1417
1418 static int btrfsic_handle_extent_data(
1419                 struct btrfsic_state *state,
1420                 struct btrfsic_block *block,
1421                 struct btrfsic_block_data_ctx *block_ctx,
1422                 u32 item_offset, int force_iodone_flag)
1423 {
1424         int ret;
1425         struct btrfs_file_extent_item file_extent_item;
1426         u64 file_extent_item_offset;
1427         u64 next_bytenr;
1428         u64 num_bytes;
1429         u64 generation;
1430         struct btrfsic_block_link *l;
1431
1432         file_extent_item_offset = offsetof(struct btrfs_leaf, items) +
1433                                   item_offset;
1434         if (file_extent_item_offset +
1435             offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
1436             block_ctx->len) {
1437                 printk(KERN_INFO
1438                        "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1439                        block_ctx->start, block_ctx->dev->name);
1440                 return -1;
1441         }
1442
1443         btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1444                 file_extent_item_offset,
1445                 offsetof(struct btrfs_file_extent_item, disk_num_bytes));
1446         if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
1447             ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) {
1448                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1449                         printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n",
1450                                file_extent_item.type,
1451                                (unsigned long long)
1452                                le64_to_cpu(file_extent_item.disk_bytenr));
1453                 return 0;
1454         }
1455
1456         if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
1457             block_ctx->len) {
1458                 printk(KERN_INFO
1459                        "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1460                        block_ctx->start, block_ctx->dev->name);
1461                 return -1;
1462         }
1463         btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1464                                      file_extent_item_offset,
1465                                      sizeof(struct btrfs_file_extent_item));
1466         next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) +
1467                       le64_to_cpu(file_extent_item.offset);
1468         generation = le64_to_cpu(file_extent_item.generation);
1469         num_bytes = le64_to_cpu(file_extent_item.num_bytes);
1470         generation = le64_to_cpu(file_extent_item.generation);
1471
1472         if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1473                 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
1474                        " offset = %llu, num_bytes = %llu\n",
1475                        file_extent_item.type,
1476                        (unsigned long long)
1477                        le64_to_cpu(file_extent_item.disk_bytenr),
1478                        (unsigned long long)le64_to_cpu(file_extent_item.offset),
1479                        (unsigned long long)num_bytes);
1480         while (num_bytes > 0) {
1481                 u32 chunk_len;
1482                 int num_copies;
1483                 int mirror_num;
1484
1485                 if (num_bytes > state->datablock_size)
1486                         chunk_len = state->datablock_size;
1487                 else
1488                         chunk_len = num_bytes;
1489
1490                 num_copies =
1491                     btrfs_num_copies(&state->root->fs_info->mapping_tree,
1492                                      next_bytenr, state->datablock_size);
1493                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1494                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1495                                (unsigned long long)next_bytenr, num_copies);
1496                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
1497                         struct btrfsic_block_data_ctx next_block_ctx;
1498                         struct btrfsic_block *next_block;
1499                         int block_was_created;
1500
1501                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1502                                 printk(KERN_INFO "btrfsic_handle_extent_data("
1503                                        "mirror_num=%d)\n", mirror_num);
1504                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1505                                 printk(KERN_INFO
1506                                        "\tdisk_bytenr = %llu, num_bytes %u\n",
1507                                        (unsigned long long)next_bytenr,
1508                                        chunk_len);
1509                         ret = btrfsic_map_block(state, next_bytenr,
1510                                                 chunk_len, &next_block_ctx,
1511                                                 mirror_num);
1512                         if (ret) {
1513                                 printk(KERN_INFO
1514                                        "btrfsic: btrfsic_map_block(@%llu,"
1515                                        " mirror=%d) failed!\n",
1516                                        (unsigned long long)next_bytenr,
1517                                        mirror_num);
1518                                 return -1;
1519                         }
1520
1521                         next_block = btrfsic_block_lookup_or_add(
1522                                         state,
1523                                         &next_block_ctx,
1524                                         "referenced ",
1525                                         0,
1526                                         force_iodone_flag,
1527                                         !force_iodone_flag,
1528                                         mirror_num,
1529                                         &block_was_created);
1530                         if (NULL == next_block) {
1531                                 printk(KERN_INFO
1532                                        "btrfsic: error, kmalloc failed!\n");
1533                                 btrfsic_release_block_ctx(&next_block_ctx);
1534                                 return -1;
1535                         }
1536                         if (!block_was_created) {
1537                                 if (next_block->logical_bytenr != next_bytenr &&
1538                                     !(!next_block->is_metadata &&
1539                                       0 == next_block->logical_bytenr)) {
1540                                         printk(KERN_INFO
1541                                                "Referenced block"
1542                                                " @%llu (%s/%llu/%d)"
1543                                                " found in hash table, D,"
1544                                                " bytenr mismatch"
1545                                                " (!= stored %llu).\n",
1546                                                (unsigned long long)next_bytenr,
1547                                                next_block_ctx.dev->name,
1548                                                (unsigned long long)
1549                                                next_block_ctx.dev_bytenr,
1550                                                mirror_num,
1551                                                (unsigned long long)
1552                                                next_block->logical_bytenr);
1553                                 }
1554                                 next_block->logical_bytenr = next_bytenr;
1555                                 next_block->mirror_num = mirror_num;
1556                         }
1557
1558                         l = btrfsic_block_link_lookup_or_add(state,
1559                                                              &next_block_ctx,
1560                                                              next_block, block,
1561                                                              generation);
1562                         btrfsic_release_block_ctx(&next_block_ctx);
1563                         if (NULL == l)
1564                                 return -1;
1565                 }
1566
1567                 next_bytenr += chunk_len;
1568                 num_bytes -= chunk_len;
1569         }
1570
1571         return 0;
1572 }
1573
1574 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
1575                              struct btrfsic_block_data_ctx *block_ctx_out,
1576                              int mirror_num)
1577 {
1578         int ret;
1579         u64 length;
1580         struct btrfs_bio *multi = NULL;
1581         struct btrfs_device *device;
1582
1583         length = len;
1584         ret = btrfs_map_block(&state->root->fs_info->mapping_tree, READ,
1585                               bytenr, &length, &multi, mirror_num);
1586
1587         device = multi->stripes[0].dev;
1588         block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
1589         block_ctx_out->dev_bytenr = multi->stripes[0].physical;
1590         block_ctx_out->start = bytenr;
1591         block_ctx_out->len = len;
1592         block_ctx_out->datav = NULL;
1593         block_ctx_out->pagev = NULL;
1594         block_ctx_out->mem_to_free = NULL;
1595
1596         if (0 == ret)
1597                 kfree(multi);
1598         if (NULL == block_ctx_out->dev) {
1599                 ret = -ENXIO;
1600                 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n");
1601         }
1602
1603         return ret;
1604 }
1605
1606 static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
1607                                   u32 len, struct block_device *bdev,
1608                                   struct btrfsic_block_data_ctx *block_ctx_out)
1609 {
1610         block_ctx_out->dev = btrfsic_dev_state_lookup(bdev);
1611         block_ctx_out->dev_bytenr = bytenr;
1612         block_ctx_out->start = bytenr;
1613         block_ctx_out->len = len;
1614         block_ctx_out->datav = NULL;
1615         block_ctx_out->pagev = NULL;
1616         block_ctx_out->mem_to_free = NULL;
1617         if (NULL != block_ctx_out->dev) {
1618                 return 0;
1619         } else {
1620                 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n");
1621                 return -ENXIO;
1622         }
1623 }
1624
1625 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
1626 {
1627         if (block_ctx->mem_to_free) {
1628                 unsigned int num_pages;
1629
1630                 BUG_ON(!block_ctx->datav);
1631                 BUG_ON(!block_ctx->pagev);
1632                 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1633                             PAGE_CACHE_SHIFT;
1634                 while (num_pages > 0) {
1635                         num_pages--;
1636                         if (block_ctx->datav[num_pages]) {
1637                                 kunmap(block_ctx->pagev[num_pages]);
1638                                 block_ctx->datav[num_pages] = NULL;
1639                         }
1640                         if (block_ctx->pagev[num_pages]) {
1641                                 __free_page(block_ctx->pagev[num_pages]);
1642                                 block_ctx->pagev[num_pages] = NULL;
1643                         }
1644                 }
1645
1646                 kfree(block_ctx->mem_to_free);
1647                 block_ctx->mem_to_free = NULL;
1648                 block_ctx->pagev = NULL;
1649                 block_ctx->datav = NULL;
1650         }
1651 }
1652
1653 static int btrfsic_read_block(struct btrfsic_state *state,
1654                               struct btrfsic_block_data_ctx *block_ctx)
1655 {
1656         unsigned int num_pages;
1657         unsigned int i;
1658         u64 dev_bytenr;
1659         int ret;
1660
1661         BUG_ON(block_ctx->datav);
1662         BUG_ON(block_ctx->pagev);
1663         BUG_ON(block_ctx->mem_to_free);
1664         if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
1665                 printk(KERN_INFO
1666                        "btrfsic: read_block() with unaligned bytenr %llu\n",
1667                        (unsigned long long)block_ctx->dev_bytenr);
1668                 return -1;
1669         }
1670
1671         num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1672                     PAGE_CACHE_SHIFT;
1673         block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) +
1674                                           sizeof(*block_ctx->pagev)) *
1675                                          num_pages, GFP_NOFS);
1676         if (!block_ctx->mem_to_free)
1677                 return -1;
1678         block_ctx->datav = block_ctx->mem_to_free;
1679         block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
1680         for (i = 0; i < num_pages; i++) {
1681                 block_ctx->pagev[i] = alloc_page(GFP_NOFS);
1682                 if (!block_ctx->pagev[i])
1683                         return -1;
1684         }
1685
1686         dev_bytenr = block_ctx->dev_bytenr;
1687         for (i = 0; i < num_pages;) {
1688                 struct bio *bio;
1689                 unsigned int j;
1690                 DECLARE_COMPLETION_ONSTACK(complete);
1691
1692                 bio = bio_alloc(GFP_NOFS, num_pages - i);
1693                 if (!bio) {
1694                         printk(KERN_INFO
1695                                "btrfsic: bio_alloc() for %u pages failed!\n",
1696                                num_pages - i);
1697                         return -1;
1698                 }
1699                 bio->bi_bdev = block_ctx->dev->bdev;
1700                 bio->bi_sector = dev_bytenr >> 9;
1701                 bio->bi_end_io = btrfsic_complete_bio_end_io;
1702                 bio->bi_private = &complete;
1703
1704                 for (j = i; j < num_pages; j++) {
1705                         ret = bio_add_page(bio, block_ctx->pagev[j],
1706                                            PAGE_CACHE_SIZE, 0);
1707                         if (PAGE_CACHE_SIZE != ret)
1708                                 break;
1709                 }
1710                 if (j == i) {
1711                         printk(KERN_INFO
1712                                "btrfsic: error, failed to add a single page!\n");
1713                         return -1;
1714                 }
1715                 submit_bio(READ, bio);
1716
1717                 /* this will also unplug the queue */
1718                 wait_for_completion(&complete);
1719
1720                 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
1721                         printk(KERN_INFO
1722                                "btrfsic: read error at logical %llu dev %s!\n",
1723                                block_ctx->start, block_ctx->dev->name);
1724                         bio_put(bio);
1725                         return -1;
1726                 }
1727                 bio_put(bio);
1728                 dev_bytenr += (j - i) * PAGE_CACHE_SIZE;
1729                 i = j;
1730         }
1731         for (i = 0; i < num_pages; i++) {
1732                 block_ctx->datav[i] = kmap(block_ctx->pagev[i]);
1733                 if (!block_ctx->datav[i]) {
1734                         printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n",
1735                                block_ctx->dev->name);
1736                         return -1;
1737                 }
1738         }
1739
1740         return block_ctx->len;
1741 }
1742
1743 static void btrfsic_complete_bio_end_io(struct bio *bio, int err)
1744 {
1745         complete((struct completion *)bio->bi_private);
1746 }
1747
1748 static void btrfsic_dump_database(struct btrfsic_state *state)
1749 {
1750         struct list_head *elem_all;
1751
1752         BUG_ON(NULL == state);
1753
1754         printk(KERN_INFO "all_blocks_list:\n");
1755         list_for_each(elem_all, &state->all_blocks_list) {
1756                 const struct btrfsic_block *const b_all =
1757                     list_entry(elem_all, struct btrfsic_block,
1758                                all_blocks_node);
1759                 struct list_head *elem_ref_to;
1760                 struct list_head *elem_ref_from;
1761
1762                 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
1763                        btrfsic_get_block_type(state, b_all),
1764                        (unsigned long long)b_all->logical_bytenr,
1765                        b_all->dev_state->name,
1766                        (unsigned long long)b_all->dev_bytenr,
1767                        b_all->mirror_num);
1768
1769                 list_for_each(elem_ref_to, &b_all->ref_to_list) {
1770                         const struct btrfsic_block_link *const l =
1771                             list_entry(elem_ref_to,
1772                                        struct btrfsic_block_link,
1773                                        node_ref_to);
1774
1775                         printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1776                                " refers %u* to"
1777                                " %c @%llu (%s/%llu/%d)\n",
1778                                btrfsic_get_block_type(state, b_all),
1779                                (unsigned long long)b_all->logical_bytenr,
1780                                b_all->dev_state->name,
1781                                (unsigned long long)b_all->dev_bytenr,
1782                                b_all->mirror_num,
1783                                l->ref_cnt,
1784                                btrfsic_get_block_type(state, l->block_ref_to),
1785                                (unsigned long long)
1786                                l->block_ref_to->logical_bytenr,
1787                                l->block_ref_to->dev_state->name,
1788                                (unsigned long long)l->block_ref_to->dev_bytenr,
1789                                l->block_ref_to->mirror_num);
1790                 }
1791
1792                 list_for_each(elem_ref_from, &b_all->ref_from_list) {
1793                         const struct btrfsic_block_link *const l =
1794                             list_entry(elem_ref_from,
1795                                        struct btrfsic_block_link,
1796                                        node_ref_from);
1797
1798                         printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1799                                " is ref %u* from"
1800                                " %c @%llu (%s/%llu/%d)\n",
1801                                btrfsic_get_block_type(state, b_all),
1802                                (unsigned long long)b_all->logical_bytenr,
1803                                b_all->dev_state->name,
1804                                (unsigned long long)b_all->dev_bytenr,
1805                                b_all->mirror_num,
1806                                l->ref_cnt,
1807                                btrfsic_get_block_type(state, l->block_ref_from),
1808                                (unsigned long long)
1809                                l->block_ref_from->logical_bytenr,
1810                                l->block_ref_from->dev_state->name,
1811                                (unsigned long long)
1812                                l->block_ref_from->dev_bytenr,
1813                                l->block_ref_from->mirror_num);
1814                 }
1815
1816                 printk(KERN_INFO "\n");
1817         }
1818 }
1819
1820 /*
1821  * Test whether the disk block contains a tree block (leaf or node)
1822  * (note that this test fails for the super block)
1823  */
1824 static int btrfsic_test_for_metadata(struct btrfsic_state *state,
1825                                      char **datav, unsigned int num_pages)
1826 {
1827         struct btrfs_header *h;
1828         u8 csum[BTRFS_CSUM_SIZE];
1829         u32 crc = ~(u32)0;
1830         unsigned int i;
1831
1832         if (num_pages * PAGE_CACHE_SIZE < state->metablock_size)
1833                 return 1; /* not metadata */
1834         num_pages = state->metablock_size >> PAGE_CACHE_SHIFT;
1835         h = (struct btrfs_header *)datav[0];
1836
1837         if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
1838                 return 1;
1839
1840         for (i = 0; i < num_pages; i++) {
1841                 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
1842                 size_t sublen = i ? PAGE_CACHE_SIZE :
1843                                     (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
1844
1845                 crc = crc32c(crc, data, sublen);
1846         }
1847         btrfs_csum_final(crc, csum);
1848         if (memcmp(csum, h->csum, state->csum_size))
1849                 return 1;
1850
1851         return 0; /* is metadata */
1852 }
1853
1854 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1855                                           u64 dev_bytenr, char **mapped_datav,
1856                                           unsigned int num_pages,
1857                                           struct bio *bio, int *bio_is_patched,
1858                                           struct buffer_head *bh,
1859                                           int submit_bio_bh_rw)
1860 {
1861         int is_metadata;
1862         struct btrfsic_block *block;
1863         struct btrfsic_block_data_ctx block_ctx;
1864         int ret;
1865         struct btrfsic_state *state = dev_state->state;
1866         struct block_device *bdev = dev_state->bdev;
1867         unsigned int processed_len;
1868
1869         if (NULL != bio_is_patched)
1870                 *bio_is_patched = 0;
1871
1872 again:
1873         if (num_pages == 0)
1874                 return;
1875
1876         processed_len = 0;
1877         is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav,
1878                                                       num_pages));
1879
1880         block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
1881                                                &state->block_hashtable);
1882         if (NULL != block) {
1883                 u64 bytenr = 0;
1884                 struct list_head *elem_ref_to;
1885                 struct list_head *tmp_ref_to;
1886
1887                 if (block->is_superblock) {
1888                         bytenr = le64_to_cpu(((struct btrfs_super_block *)
1889                                               mapped_datav[0])->bytenr);
1890                         if (num_pages * PAGE_CACHE_SIZE <
1891                             BTRFS_SUPER_INFO_SIZE) {
1892                                 printk(KERN_INFO
1893                                        "btrfsic: cannot work with too short bios!\n");
1894                                 return;
1895                         }
1896                         is_metadata = 1;
1897                         BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1));
1898                         processed_len = BTRFS_SUPER_INFO_SIZE;
1899                         if (state->print_mask &
1900                             BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
1901                                 printk(KERN_INFO
1902                                        "[before new superblock is written]:\n");
1903                                 btrfsic_dump_tree_sub(state, block, 0);
1904                         }
1905                 }
1906                 if (is_metadata) {
1907                         if (!block->is_superblock) {
1908                                 if (num_pages * PAGE_CACHE_SIZE <
1909                                     state->metablock_size) {
1910                                         printk(KERN_INFO
1911                                                "btrfsic: cannot work with too short bios!\n");
1912                                         return;
1913                                 }
1914                                 processed_len = state->metablock_size;
1915                                 bytenr = le64_to_cpu(((struct btrfs_header *)
1916                                                       mapped_datav[0])->bytenr);
1917                                 btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
1918                                                                dev_state,
1919                                                                dev_bytenr);
1920                         }
1921                         if (block->logical_bytenr != bytenr) {
1922                                 printk(KERN_INFO
1923                                        "Written block @%llu (%s/%llu/%d)"
1924                                        " found in hash table, %c,"
1925                                        " bytenr mismatch"
1926                                        " (!= stored %llu).\n",
1927                                        (unsigned long long)bytenr,
1928                                        dev_state->name,
1929                                        (unsigned long long)dev_bytenr,
1930                                        block->mirror_num,
1931                                        btrfsic_get_block_type(state, block),
1932                                        (unsigned long long)
1933                                        block->logical_bytenr);
1934                                 block->logical_bytenr = bytenr;
1935                         } else if (state->print_mask &
1936                                    BTRFSIC_PRINT_MASK_VERBOSE)
1937                                 printk(KERN_INFO
1938                                        "Written block @%llu (%s/%llu/%d)"
1939                                        " found in hash table, %c.\n",
1940                                        (unsigned long long)bytenr,
1941                                        dev_state->name,
1942                                        (unsigned long long)dev_bytenr,
1943                                        block->mirror_num,
1944                                        btrfsic_get_block_type(state, block));
1945                 } else {
1946                         if (num_pages * PAGE_CACHE_SIZE <
1947                             state->datablock_size) {
1948                                 printk(KERN_INFO
1949                                        "btrfsic: cannot work with too short bios!\n");
1950                                 return;
1951                         }
1952                         processed_len = state->datablock_size;
1953                         bytenr = block->logical_bytenr;
1954                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1955                                 printk(KERN_INFO
1956                                        "Written block @%llu (%s/%llu/%d)"
1957                                        " found in hash table, %c.\n",
1958                                        (unsigned long long)bytenr,
1959                                        dev_state->name,
1960                                        (unsigned long long)dev_bytenr,
1961                                        block->mirror_num,
1962                                        btrfsic_get_block_type(state, block));
1963                 }
1964
1965                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1966                         printk(KERN_INFO
1967                                "ref_to_list: %cE, ref_from_list: %cE\n",
1968                                list_empty(&block->ref_to_list) ? ' ' : '!',
1969                                list_empty(&block->ref_from_list) ? ' ' : '!');
1970                 if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
1971                         printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1972                                " @%llu (%s/%llu/%d), old(gen=%llu,"
1973                                " objectid=%llu, type=%d, offset=%llu),"
1974                                " new(gen=%llu),"
1975                                " which is referenced by most recent superblock"
1976                                " (superblockgen=%llu)!\n",
1977                                btrfsic_get_block_type(state, block),
1978                                (unsigned long long)bytenr,
1979                                dev_state->name,
1980                                (unsigned long long)dev_bytenr,
1981                                block->mirror_num,
1982                                (unsigned long long)block->generation,
1983                                (unsigned long long)
1984                                le64_to_cpu(block->disk_key.objectid),
1985                                block->disk_key.type,
1986                                (unsigned long long)
1987                                le64_to_cpu(block->disk_key.offset),
1988                                (unsigned long long)
1989                                le64_to_cpu(((struct btrfs_header *)
1990                                             mapped_datav[0])->generation),
1991                                (unsigned long long)
1992                                state->max_superblock_generation);
1993                         btrfsic_dump_tree(state);
1994                 }
1995
1996                 if (!block->is_iodone && !block->never_written) {
1997                         printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1998                                " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
1999                                " which is not yet iodone!\n",
2000                                btrfsic_get_block_type(state, block),
2001                                (unsigned long long)bytenr,
2002                                dev_state->name,
2003                                (unsigned long long)dev_bytenr,
2004                                block->mirror_num,
2005                                (unsigned long long)block->generation,
2006                                (unsigned long long)
2007                                le64_to_cpu(((struct btrfs_header *)
2008                                             mapped_datav[0])->generation));
2009                         /* it would not be safe to go on */
2010                         btrfsic_dump_tree(state);
2011                         goto continue_loop;
2012                 }
2013
2014                 /*
2015                  * Clear all references of this block. Do not free
2016                  * the block itself even if is not referenced anymore
2017                  * because it still carries valueable information
2018                  * like whether it was ever written and IO completed.
2019                  */
2020                 list_for_each_safe(elem_ref_to, tmp_ref_to,
2021                                    &block->ref_to_list) {
2022                         struct btrfsic_block_link *const l =
2023                             list_entry(elem_ref_to,
2024                                        struct btrfsic_block_link,
2025                                        node_ref_to);
2026
2027                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2028                                 btrfsic_print_rem_link(state, l);
2029                         l->ref_cnt--;
2030                         if (0 == l->ref_cnt) {
2031                                 list_del(&l->node_ref_to);
2032                                 list_del(&l->node_ref_from);
2033                                 btrfsic_block_link_hashtable_remove(l);
2034                                 btrfsic_block_link_free(l);
2035                         }
2036                 }
2037
2038                 if (block->is_superblock)
2039                         ret = btrfsic_map_superblock(state, bytenr,
2040                                                      processed_len,
2041                                                      bdev, &block_ctx);
2042                 else
2043                         ret = btrfsic_map_block(state, bytenr, processed_len,
2044                                                 &block_ctx, 0);
2045                 if (ret) {
2046                         printk(KERN_INFO
2047                                "btrfsic: btrfsic_map_block(root @%llu)"
2048                                " failed!\n", (unsigned long long)bytenr);
2049                         goto continue_loop;
2050                 }
2051                 block_ctx.datav = mapped_datav;
2052                 /* the following is required in case of writes to mirrors,
2053                  * use the same that was used for the lookup */
2054                 block_ctx.dev = dev_state;
2055                 block_ctx.dev_bytenr = dev_bytenr;
2056
2057                 if (is_metadata || state->include_extent_data) {
2058                         block->never_written = 0;
2059                         block->iodone_w_error = 0;
2060                         if (NULL != bio) {
2061                                 block->is_iodone = 0;
2062                                 BUG_ON(NULL == bio_is_patched);
2063                                 if (!*bio_is_patched) {
2064                                         block->orig_bio_bh_private =
2065                                             bio->bi_private;
2066                                         block->orig_bio_bh_end_io.bio =
2067                                             bio->bi_end_io;
2068                                         block->next_in_same_bio = NULL;
2069                                         bio->bi_private = block;
2070                                         bio->bi_end_io = btrfsic_bio_end_io;
2071                                         *bio_is_patched = 1;
2072                                 } else {
2073                                         struct btrfsic_block *chained_block =
2074                                             (struct btrfsic_block *)
2075                                             bio->bi_private;
2076
2077                                         BUG_ON(NULL == chained_block);
2078                                         block->orig_bio_bh_private =
2079                                             chained_block->orig_bio_bh_private;
2080                                         block->orig_bio_bh_end_io.bio =
2081                                             chained_block->orig_bio_bh_end_io.
2082                                             bio;
2083                                         block->next_in_same_bio = chained_block;
2084                                         bio->bi_private = block;
2085                                 }
2086                         } else if (NULL != bh) {
2087                                 block->is_iodone = 0;
2088                                 block->orig_bio_bh_private = bh->b_private;
2089                                 block->orig_bio_bh_end_io.bh = bh->b_end_io;
2090                                 block->next_in_same_bio = NULL;
2091                                 bh->b_private = block;
2092                                 bh->b_end_io = btrfsic_bh_end_io;
2093                         } else {
2094                                 block->is_iodone = 1;
2095                                 block->orig_bio_bh_private = NULL;
2096                                 block->orig_bio_bh_end_io.bio = NULL;
2097                                 block->next_in_same_bio = NULL;
2098                         }
2099                 }
2100
2101                 block->flush_gen = dev_state->last_flush_gen + 1;
2102                 block->submit_bio_bh_rw = submit_bio_bh_rw;
2103                 if (is_metadata) {
2104                         block->logical_bytenr = bytenr;
2105                         block->is_metadata = 1;
2106                         if (block->is_superblock) {
2107                                 BUG_ON(PAGE_CACHE_SIZE !=
2108                                        BTRFS_SUPER_INFO_SIZE);
2109                                 ret = btrfsic_process_written_superblock(
2110                                                 state,
2111                                                 block,
2112                                                 (struct btrfs_super_block *)
2113                                                 mapped_datav[0]);
2114                                 if (state->print_mask &
2115                                     BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) {
2116                                         printk(KERN_INFO
2117                                         "[after new superblock is written]:\n");
2118                                         btrfsic_dump_tree_sub(state, block, 0);
2119                                 }
2120                         } else {
2121                                 block->mirror_num = 0;  /* unknown */
2122                                 ret = btrfsic_process_metablock(
2123                                                 state,
2124                                                 block,
2125                                                 &block_ctx,
2126                                                 0, 0);
2127                         }
2128                         if (ret)
2129                                 printk(KERN_INFO
2130                                        "btrfsic: btrfsic_process_metablock"
2131                                        "(root @%llu) failed!\n",
2132                                        (unsigned long long)dev_bytenr);
2133                 } else {
2134                         block->is_metadata = 0;
2135                         block->mirror_num = 0;  /* unknown */
2136                         block->generation = BTRFSIC_GENERATION_UNKNOWN;
2137                         if (!state->include_extent_data
2138                             && list_empty(&block->ref_from_list)) {
2139                                 /*
2140                                  * disk block is overwritten with extent
2141                                  * data (not meta data) and we are configured
2142                                  * to not include extent data: take the
2143                                  * chance and free the block's memory
2144                                  */
2145                                 btrfsic_block_hashtable_remove(block);
2146                                 list_del(&block->all_blocks_node);
2147                                 btrfsic_block_free(block);
2148                         }
2149                 }
2150                 btrfsic_release_block_ctx(&block_ctx);
2151         } else {
2152                 /* block has not been found in hash table */
2153                 u64 bytenr;
2154
2155                 if (!is_metadata) {
2156                         processed_len = state->datablock_size;
2157                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2158                                 printk(KERN_INFO "Written block (%s/%llu/?)"
2159                                        " !found in hash table, D.\n",
2160                                        dev_state->name,
2161                                        (unsigned long long)dev_bytenr);
2162                         if (!state->include_extent_data) {
2163                                 /* ignore that written D block */
2164                                 goto continue_loop;
2165                         }
2166
2167                         /* this is getting ugly for the
2168                          * include_extent_data case... */
2169                         bytenr = 0;     /* unknown */
2170                         block_ctx.start = bytenr;
2171                         block_ctx.len = processed_len;
2172                         block_ctx.mem_to_free = NULL;
2173                         block_ctx.pagev = NULL;
2174                 } else {
2175                         processed_len = state->metablock_size;
2176                         bytenr = le64_to_cpu(((struct btrfs_header *)
2177                                               mapped_datav[0])->bytenr);
2178                         btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
2179                                                        dev_bytenr);
2180                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2181                                 printk(KERN_INFO
2182                                        "Written block @%llu (%s/%llu/?)"
2183                                        " !found in hash table, M.\n",
2184                                        (unsigned long long)bytenr,
2185                                        dev_state->name,
2186                                        (unsigned long long)dev_bytenr);
2187
2188                         ret = btrfsic_map_block(state, bytenr, processed_len,
2189                                                 &block_ctx, 0);
2190                         if (ret) {
2191                                 printk(KERN_INFO
2192                                        "btrfsic: btrfsic_map_block(root @%llu)"
2193                                        " failed!\n",
2194                                        (unsigned long long)dev_bytenr);
2195                                 goto continue_loop;
2196                         }
2197                 }
2198                 block_ctx.datav = mapped_datav;
2199                 /* the following is required in case of writes to mirrors,
2200                  * use the same that was used for the lookup */
2201                 block_ctx.dev = dev_state;
2202                 block_ctx.dev_bytenr = dev_bytenr;
2203
2204                 block = btrfsic_block_alloc();
2205                 if (NULL == block) {
2206                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2207                         btrfsic_release_block_ctx(&block_ctx);
2208                         goto continue_loop;
2209                 }
2210                 block->dev_state = dev_state;
2211                 block->dev_bytenr = dev_bytenr;
2212                 block->logical_bytenr = bytenr;
2213                 block->is_metadata = is_metadata;
2214                 block->never_written = 0;
2215                 block->iodone_w_error = 0;
2216                 block->mirror_num = 0;  /* unknown */
2217                 block->flush_gen = dev_state->last_flush_gen + 1;
2218                 block->submit_bio_bh_rw = submit_bio_bh_rw;
2219                 if (NULL != bio) {
2220                         block->is_iodone = 0;
2221                         BUG_ON(NULL == bio_is_patched);
2222                         if (!*bio_is_patched) {
2223                                 block->orig_bio_bh_private = bio->bi_private;
2224                                 block->orig_bio_bh_end_io.bio = bio->bi_end_io;
2225                                 block->next_in_same_bio = NULL;
2226                                 bio->bi_private = block;
2227                                 bio->bi_end_io = btrfsic_bio_end_io;
2228                                 *bio_is_patched = 1;
2229                         } else {
2230                                 struct btrfsic_block *chained_block =
2231                                     (struct btrfsic_block *)
2232                                     bio->bi_private;
2233
2234                                 BUG_ON(NULL == chained_block);
2235                                 block->orig_bio_bh_private =
2236                                     chained_block->orig_bio_bh_private;
2237                                 block->orig_bio_bh_end_io.bio =
2238                                     chained_block->orig_bio_bh_end_io.bio;
2239                                 block->next_in_same_bio = chained_block;
2240                                 bio->bi_private = block;
2241                         }
2242                 } else if (NULL != bh) {
2243                         block->is_iodone = 0;
2244                         block->orig_bio_bh_private = bh->b_private;
2245                         block->orig_bio_bh_end_io.bh = bh->b_end_io;
2246                         block->next_in_same_bio = NULL;
2247                         bh->b_private = block;
2248                         bh->b_end_io = btrfsic_bh_end_io;
2249                 } else {
2250                         block->is_iodone = 1;
2251                         block->orig_bio_bh_private = NULL;
2252                         block->orig_bio_bh_end_io.bio = NULL;
2253                         block->next_in_same_bio = NULL;
2254                 }
2255                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2256                         printk(KERN_INFO
2257                                "New written %c-block @%llu (%s/%llu/%d)\n",
2258                                is_metadata ? 'M' : 'D',
2259                                (unsigned long long)block->logical_bytenr,
2260                                block->dev_state->name,
2261                                (unsigned long long)block->dev_bytenr,
2262                                block->mirror_num);
2263                 list_add(&block->all_blocks_node, &state->all_blocks_list);
2264                 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2265
2266                 if (is_metadata) {
2267                         ret = btrfsic_process_metablock(state, block,
2268                                                         &block_ctx, 0, 0);
2269                         if (ret)
2270                                 printk(KERN_INFO
2271                                        "btrfsic: process_metablock(root @%llu)"
2272                                        " failed!\n",
2273                                        (unsigned long long)dev_bytenr);
2274                 }
2275                 btrfsic_release_block_ctx(&block_ctx);
2276         }
2277
2278 continue_loop:
2279         BUG_ON(!processed_len);
2280         dev_bytenr += processed_len;
2281         mapped_datav += processed_len >> PAGE_CACHE_SHIFT;
2282         num_pages -= processed_len >> PAGE_CACHE_SHIFT;
2283         goto again;
2284 }
2285
2286 static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
2287 {
2288         struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private;
2289         int iodone_w_error;
2290
2291         /* mutex is not held! This is not save if IO is not yet completed
2292          * on umount */
2293         iodone_w_error = 0;
2294         if (bio_error_status)
2295                 iodone_w_error = 1;
2296
2297         BUG_ON(NULL == block);
2298         bp->bi_private = block->orig_bio_bh_private;
2299         bp->bi_end_io = block->orig_bio_bh_end_io.bio;
2300
2301         do {
2302                 struct btrfsic_block *next_block;
2303                 struct btrfsic_dev_state *const dev_state = block->dev_state;
2304
2305                 if ((dev_state->state->print_mask &
2306                      BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2307                         printk(KERN_INFO
2308                                "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
2309                                bio_error_status,
2310                                btrfsic_get_block_type(dev_state->state, block),
2311                                (unsigned long long)block->logical_bytenr,
2312                                dev_state->name,
2313                                (unsigned long long)block->dev_bytenr,
2314                                block->mirror_num);
2315                 next_block = block->next_in_same_bio;
2316                 block->iodone_w_error = iodone_w_error;
2317                 if (block->submit_bio_bh_rw & REQ_FLUSH) {
2318                         dev_state->last_flush_gen++;
2319                         if ((dev_state->state->print_mask &
2320                              BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2321                                 printk(KERN_INFO
2322                                        "bio_end_io() new %s flush_gen=%llu\n",
2323                                        dev_state->name,
2324                                        (unsigned long long)
2325                                        dev_state->last_flush_gen);
2326                 }
2327                 if (block->submit_bio_bh_rw & REQ_FUA)
2328                         block->flush_gen = 0; /* FUA completed means block is
2329                                                * on disk */
2330                 block->is_iodone = 1; /* for FLUSH, this releases the block */
2331                 block = next_block;
2332         } while (NULL != block);
2333
2334         bp->bi_end_io(bp, bio_error_status);
2335 }
2336
2337 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
2338 {
2339         struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private;
2340         int iodone_w_error = !uptodate;
2341         struct btrfsic_dev_state *dev_state;
2342
2343         BUG_ON(NULL == block);
2344         dev_state = block->dev_state;
2345         if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2346                 printk(KERN_INFO
2347                        "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
2348                        iodone_w_error,
2349                        btrfsic_get_block_type(dev_state->state, block),
2350                        (unsigned long long)block->logical_bytenr,
2351                        block->dev_state->name,
2352                        (unsigned long long)block->dev_bytenr,
2353                        block->mirror_num);
2354
2355         block->iodone_w_error = iodone_w_error;
2356         if (block->submit_bio_bh_rw & REQ_FLUSH) {
2357                 dev_state->last_flush_gen++;
2358                 if ((dev_state->state->print_mask &
2359                      BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2360                         printk(KERN_INFO
2361                                "bh_end_io() new %s flush_gen=%llu\n",
2362                                dev_state->name,
2363                                (unsigned long long)dev_state->last_flush_gen);
2364         }
2365         if (block->submit_bio_bh_rw & REQ_FUA)
2366                 block->flush_gen = 0; /* FUA completed means block is on disk */
2367
2368         bh->b_private = block->orig_bio_bh_private;
2369         bh->b_end_io = block->orig_bio_bh_end_io.bh;
2370         block->is_iodone = 1; /* for FLUSH, this releases the block */
2371         bh->b_end_io(bh, uptodate);
2372 }
2373
2374 static int btrfsic_process_written_superblock(
2375                 struct btrfsic_state *state,
2376                 struct btrfsic_block *const superblock,
2377                 struct btrfs_super_block *const super_hdr)
2378 {
2379         int pass;
2380
2381         superblock->generation = btrfs_super_generation(super_hdr);
2382         if (!(superblock->generation > state->max_superblock_generation ||
2383               0 == state->max_superblock_generation)) {
2384                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2385                         printk(KERN_INFO
2386                                "btrfsic: superblock @%llu (%s/%llu/%d)"
2387                                " with old gen %llu <= %llu\n",
2388                                (unsigned long long)superblock->logical_bytenr,
2389                                superblock->dev_state->name,
2390                                (unsigned long long)superblock->dev_bytenr,
2391                                superblock->mirror_num,
2392                                (unsigned long long)
2393                                btrfs_super_generation(super_hdr),
2394                                (unsigned long long)
2395                                state->max_superblock_generation);
2396         } else {
2397                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2398                         printk(KERN_INFO
2399                                "btrfsic: got new superblock @%llu (%s/%llu/%d)"
2400                                " with new gen %llu > %llu\n",
2401                                (unsigned long long)superblock->logical_bytenr,
2402                                superblock->dev_state->name,
2403                                (unsigned long long)superblock->dev_bytenr,
2404                                superblock->mirror_num,
2405                                (unsigned long long)
2406                                btrfs_super_generation(super_hdr),
2407                                (unsigned long long)
2408                                state->max_superblock_generation);
2409
2410                 state->max_superblock_generation =
2411                     btrfs_super_generation(super_hdr);
2412                 state->latest_superblock = superblock;
2413         }
2414
2415         for (pass = 0; pass < 3; pass++) {
2416                 int ret;
2417                 u64 next_bytenr;
2418                 struct btrfsic_block *next_block;
2419                 struct btrfsic_block_data_ctx tmp_next_block_ctx;
2420                 struct btrfsic_block_link *l;
2421                 int num_copies;
2422                 int mirror_num;
2423                 const char *additional_string = NULL;
2424                 struct btrfs_disk_key tmp_disk_key;
2425
2426                 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
2427                 tmp_disk_key.offset = 0;
2428
2429                 switch (pass) {
2430                 case 0:
2431                         tmp_disk_key.objectid =
2432                             cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
2433                         additional_string = "root ";
2434                         next_bytenr = btrfs_super_root(super_hdr);
2435                         if (state->print_mask &
2436                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2437                                 printk(KERN_INFO "root@%llu\n",
2438                                        (unsigned long long)next_bytenr);
2439                         break;
2440                 case 1:
2441                         tmp_disk_key.objectid =
2442                             cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
2443                         additional_string = "chunk ";
2444                         next_bytenr = btrfs_super_chunk_root(super_hdr);
2445                         if (state->print_mask &
2446                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2447                                 printk(KERN_INFO "chunk@%llu\n",
2448                                        (unsigned long long)next_bytenr);
2449                         break;
2450                 case 2:
2451                         tmp_disk_key.objectid =
2452                             cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
2453                         additional_string = "log ";
2454                         next_bytenr = btrfs_super_log_root(super_hdr);
2455                         if (0 == next_bytenr)
2456                                 continue;
2457                         if (state->print_mask &
2458                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2459                                 printk(KERN_INFO "log@%llu\n",
2460                                        (unsigned long long)next_bytenr);
2461                         break;
2462                 }
2463
2464                 num_copies =
2465                     btrfs_num_copies(&state->root->fs_info->mapping_tree,
2466                                      next_bytenr, BTRFS_SUPER_INFO_SIZE);
2467                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
2468                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
2469                                (unsigned long long)next_bytenr, num_copies);
2470                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2471                         int was_created;
2472
2473                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2474                                 printk(KERN_INFO
2475                                        "btrfsic_process_written_superblock("
2476                                        "mirror_num=%d)\n", mirror_num);
2477                         ret = btrfsic_map_block(state, next_bytenr,
2478                                                 BTRFS_SUPER_INFO_SIZE,
2479                                                 &tmp_next_block_ctx,
2480                                                 mirror_num);
2481                         if (ret) {
2482                                 printk(KERN_INFO
2483                                        "btrfsic: btrfsic_map_block(@%llu,"
2484                                        " mirror=%d) failed!\n",
2485                                        (unsigned long long)next_bytenr,
2486                                        mirror_num);
2487                                 return -1;
2488                         }
2489
2490                         next_block = btrfsic_block_lookup_or_add(
2491                                         state,
2492                                         &tmp_next_block_ctx,
2493                                         additional_string,
2494                                         1, 0, 1,
2495                                         mirror_num,
2496                                         &was_created);
2497                         if (NULL == next_block) {
2498                                 printk(KERN_INFO
2499                                        "btrfsic: error, kmalloc failed!\n");
2500                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
2501                                 return -1;
2502                         }
2503
2504                         next_block->disk_key = tmp_disk_key;
2505                         if (was_created)
2506                                 next_block->generation =
2507                                     BTRFSIC_GENERATION_UNKNOWN;
2508                         l = btrfsic_block_link_lookup_or_add(
2509                                         state,
2510                                         &tmp_next_block_ctx,
2511                                         next_block,
2512                                         superblock,
2513                                         BTRFSIC_GENERATION_UNKNOWN);
2514                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
2515                         if (NULL == l)
2516                                 return -1;
2517                 }
2518         }
2519
2520         if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) {
2521                 WARN_ON(1);
2522                 btrfsic_dump_tree(state);
2523         }
2524
2525         return 0;
2526 }
2527
2528 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2529                                         struct btrfsic_block *const block,
2530                                         int recursion_level)
2531 {
2532         struct list_head *elem_ref_to;
2533         int ret = 0;
2534
2535         if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2536                 /*
2537                  * Note that this situation can happen and does not
2538                  * indicate an error in regular cases. It happens
2539                  * when disk blocks are freed and later reused.
2540                  * The check-integrity module is not aware of any
2541                  * block free operations, it just recognizes block
2542                  * write operations. Therefore it keeps the linkage
2543                  * information for a block until a block is
2544                  * rewritten. This can temporarily cause incorrect
2545                  * and even circular linkage informations. This
2546                  * causes no harm unless such blocks are referenced
2547                  * by the most recent super block.
2548                  */
2549                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2550                         printk(KERN_INFO
2551                                "btrfsic: abort cyclic linkage (case 1).\n");
2552
2553                 return ret;
2554         }
2555
2556         /*
2557          * This algorithm is recursive because the amount of used stack
2558          * space is very small and the max recursion depth is limited.
2559          */
2560         list_for_each(elem_ref_to, &block->ref_to_list) {
2561                 const struct btrfsic_block_link *const l =
2562                     list_entry(elem_ref_to, struct btrfsic_block_link,
2563                                node_ref_to);
2564
2565                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2566                         printk(KERN_INFO
2567                                "rl=%d, %c @%llu (%s/%llu/%d)"
2568                                " %u* refers to %c @%llu (%s/%llu/%d)\n",
2569                                recursion_level,
2570                                btrfsic_get_block_type(state, block),
2571                                (unsigned long long)block->logical_bytenr,
2572                                block->dev_state->name,
2573                                (unsigned long long)block->dev_bytenr,
2574                                block->mirror_num,
2575                                l->ref_cnt,
2576                                btrfsic_get_block_type(state, l->block_ref_to),
2577                                (unsigned long long)
2578                                l->block_ref_to->logical_bytenr,
2579                                l->block_ref_to->dev_state->name,
2580                                (unsigned long long)l->block_ref_to->dev_bytenr,
2581                                l->block_ref_to->mirror_num);
2582                 if (l->block_ref_to->never_written) {
2583                         printk(KERN_INFO "btrfs: attempt to write superblock"
2584                                " which references block %c @%llu (%s/%llu/%d)"
2585                                " which is never written!\n",
2586                                btrfsic_get_block_type(state, l->block_ref_to),
2587                                (unsigned long long)
2588                                l->block_ref_to->logical_bytenr,
2589                                l->block_ref_to->dev_state->name,
2590                                (unsigned long long)l->block_ref_to->dev_bytenr,
2591                                l->block_ref_to->mirror_num);
2592                         ret = -1;
2593                 } else if (!l->block_ref_to->is_iodone) {
2594                         printk(KERN_INFO "btrfs: attempt to write superblock"
2595                                " which references block %c @%llu (%s/%llu/%d)"
2596                                " which is not yet iodone!\n",
2597                                btrfsic_get_block_type(state, l->block_ref_to),
2598                                (unsigned long long)
2599                                l->block_ref_to->logical_bytenr,
2600                                l->block_ref_to->dev_state->name,
2601                                (unsigned long long)l->block_ref_to->dev_bytenr,
2602                                l->block_ref_to->mirror_num);
2603                         ret = -1;
2604                 } else if (l->parent_generation !=
2605                            l->block_ref_to->generation &&
2606                            BTRFSIC_GENERATION_UNKNOWN !=
2607                            l->parent_generation &&
2608                            BTRFSIC_GENERATION_UNKNOWN !=
2609                            l->block_ref_to->generation) {
2610                         printk(KERN_INFO "btrfs: attempt to write superblock"
2611                                " which references block %c @%llu (%s/%llu/%d)"
2612                                " with generation %llu !="
2613                                " parent generation %llu!\n",
2614                                btrfsic_get_block_type(state, l->block_ref_to),
2615                                (unsigned long long)
2616                                l->block_ref_to->logical_bytenr,
2617                                l->block_ref_to->dev_state->name,
2618                                (unsigned long long)l->block_ref_to->dev_bytenr,
2619                                l->block_ref_to->mirror_num,
2620                                (unsigned long long)l->block_ref_to->generation,
2621                                (unsigned long long)l->parent_generation);
2622                         ret = -1;
2623                 } else if (l->block_ref_to->flush_gen >
2624                            l->block_ref_to->dev_state->last_flush_gen) {
2625                         printk(KERN_INFO "btrfs: attempt to write superblock"
2626                                " which references block %c @%llu (%s/%llu/%d)"
2627                                " which is not flushed out of disk's write cache"
2628                                " (block flush_gen=%llu,"
2629                                " dev->flush_gen=%llu)!\n",
2630                                btrfsic_get_block_type(state, l->block_ref_to),
2631                                (unsigned long long)
2632                                l->block_ref_to->logical_bytenr,
2633                                l->block_ref_to->dev_state->name,
2634                                (unsigned long long)l->block_ref_to->dev_bytenr,
2635                                l->block_ref_to->mirror_num,
2636                                (unsigned long long)block->flush_gen,
2637                                (unsigned long long)
2638                                l->block_ref_to->dev_state->last_flush_gen);
2639                         ret = -1;
2640                 } else if (-1 == btrfsic_check_all_ref_blocks(state,
2641                                                               l->block_ref_to,
2642                                                               recursion_level +
2643                                                               1)) {
2644                         ret = -1;
2645                 }
2646         }
2647
2648         return ret;
2649 }
2650
2651 static int btrfsic_is_block_ref_by_superblock(
2652                 const struct btrfsic_state *state,
2653                 const struct btrfsic_block *block,
2654                 int recursion_level)
2655 {
2656         struct list_head *elem_ref_from;
2657
2658         if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2659                 /* refer to comment at "abort cyclic linkage (case 1)" */
2660                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2661                         printk(KERN_INFO
2662                                "btrfsic: abort cyclic linkage (case 2).\n");
2663
2664                 return 0;
2665         }
2666
2667         /*
2668          * This algorithm is recursive because the amount of used stack space
2669          * is very small and the max recursion depth is limited.
2670          */
2671         list_for_each(elem_ref_from, &block->ref_from_list) {
2672                 const struct btrfsic_block_link *const l =
2673                     list_entry(elem_ref_from, struct btrfsic_block_link,
2674                                node_ref_from);
2675
2676                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2677                         printk(KERN_INFO
2678                                "rl=%d, %c @%llu (%s/%llu/%d)"
2679                                " is ref %u* from %c @%llu (%s/%llu/%d)\n",
2680                                recursion_level,
2681                                btrfsic_get_block_type(state, block),
2682                                (unsigned long long)block->logical_bytenr,
2683                                block->dev_state->name,
2684                                (unsigned long long)block->dev_bytenr,
2685                                block->mirror_num,
2686                                l->ref_cnt,
2687                                btrfsic_get_block_type(state, l->block_ref_from),
2688                                (unsigned long long)
2689                                l->block_ref_from->logical_bytenr,
2690                                l->block_ref_from->dev_state->name,
2691                                (unsigned long long)
2692                                l->block_ref_from->dev_bytenr,
2693                                l->block_ref_from->mirror_num);
2694                 if (l->block_ref_from->is_superblock &&
2695                     state->latest_superblock->dev_bytenr ==
2696                     l->block_ref_from->dev_bytenr &&
2697                     state->latest_superblock->dev_state->bdev ==
2698                     l->block_ref_from->dev_state->bdev)
2699                         return 1;
2700                 else if (btrfsic_is_block_ref_by_superblock(state,
2701                                                             l->block_ref_from,
2702                                                             recursion_level +
2703                                                             1))
2704                         return 1;
2705         }
2706
2707         return 0;
2708 }
2709
2710 static void btrfsic_print_add_link(const struct btrfsic_state *state,
2711                                    const struct btrfsic_block_link *l)
2712 {
2713         printk(KERN_INFO
2714                "Add %u* link from %c @%llu (%s/%llu/%d)"
2715                " to %c @%llu (%s/%llu/%d).\n",
2716                l->ref_cnt,
2717                btrfsic_get_block_type(state, l->block_ref_from),
2718                (unsigned long long)l->block_ref_from->logical_bytenr,
2719                l->block_ref_from->dev_state->name,
2720                (unsigned long long)l->block_ref_from->dev_bytenr,
2721                l->block_ref_from->mirror_num,
2722                btrfsic_get_block_type(state, l->block_ref_to),
2723                (unsigned long long)l->block_ref_to->logical_bytenr,
2724                l->block_ref_to->dev_state->name,
2725                (unsigned long long)l->block_ref_to->dev_bytenr,
2726                l->block_ref_to->mirror_num);
2727 }
2728
2729 static void btrfsic_print_rem_link(const struct btrfsic_state *state,
2730                                    const struct btrfsic_block_link *l)
2731 {
2732         printk(KERN_INFO
2733                "Rem %u* link from %c @%llu (%s/%llu/%d)"
2734                " to %c @%llu (%s/%llu/%d).\n",
2735                l->ref_cnt,
2736                btrfsic_get_block_type(state, l->block_ref_from),
2737                (unsigned long long)l->block_ref_from->logical_bytenr,
2738                l->block_ref_from->dev_state->name,
2739                (unsigned long long)l->block_ref_from->dev_bytenr,
2740                l->block_ref_from->mirror_num,
2741                btrfsic_get_block_type(state, l->block_ref_to),
2742                (unsigned long long)l->block_ref_to->logical_bytenr,
2743                l->block_ref_to->dev_state->name,
2744                (unsigned long long)l->block_ref_to->dev_bytenr,
2745                l->block_ref_to->mirror_num);
2746 }
2747
2748 static char btrfsic_get_block_type(const struct btrfsic_state *state,
2749                                    const struct btrfsic_block *block)
2750 {
2751         if (block->is_superblock &&
2752             state->latest_superblock->dev_bytenr == block->dev_bytenr &&
2753             state->latest_superblock->dev_state->bdev == block->dev_state->bdev)
2754                 return 'S';
2755         else if (block->is_superblock)
2756                 return 's';
2757         else if (block->is_metadata)
2758                 return 'M';
2759         else
2760                 return 'D';
2761 }
2762
2763 static void btrfsic_dump_tree(const struct btrfsic_state *state)
2764 {
2765         btrfsic_dump_tree_sub(state, state->latest_superblock, 0);
2766 }
2767
2768 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
2769                                   const struct btrfsic_block *block,
2770                                   int indent_level)
2771 {
2772         struct list_head *elem_ref_to;
2773         int indent_add;
2774         static char buf[80];
2775         int cursor_position;
2776
2777         /*
2778          * Should better fill an on-stack buffer with a complete line and
2779          * dump it at once when it is time to print a newline character.
2780          */
2781
2782         /*
2783          * This algorithm is recursive because the amount of used stack space
2784          * is very small and the max recursion depth is limited.
2785          */
2786         indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
2787                              btrfsic_get_block_type(state, block),
2788                              (unsigned long long)block->logical_bytenr,
2789                              block->dev_state->name,
2790                              (unsigned long long)block->dev_bytenr,
2791                              block->mirror_num);
2792         if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2793                 printk("[...]\n");
2794                 return;
2795         }
2796         printk(buf);
2797         indent_level += indent_add;
2798         if (list_empty(&block->ref_to_list)) {
2799                 printk("\n");
2800                 return;
2801         }
2802         if (block->mirror_num > 1 &&
2803             !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) {
2804                 printk(" [...]\n");
2805                 return;
2806         }
2807
2808         cursor_position = indent_level;
2809         list_for_each(elem_ref_to, &block->ref_to_list) {
2810                 const struct btrfsic_block_link *const l =
2811                     list_entry(elem_ref_to, struct btrfsic_block_link,
2812                                node_ref_to);
2813
2814                 while (cursor_position < indent_level) {
2815                         printk(" ");
2816                         cursor_position++;
2817                 }
2818                 if (l->ref_cnt > 1)
2819                         indent_add = sprintf(buf, " %d*--> ", l->ref_cnt);
2820                 else
2821                         indent_add = sprintf(buf, " --> ");
2822                 if (indent_level + indent_add >
2823                     BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2824                         printk("[...]\n");
2825                         cursor_position = 0;
2826                         continue;
2827                 }
2828
2829                 printk(buf);
2830
2831                 btrfsic_dump_tree_sub(state, l->block_ref_to,
2832                                       indent_level + indent_add);
2833                 cursor_position = 0;
2834         }
2835 }
2836
2837 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
2838                 struct btrfsic_state *state,
2839                 struct btrfsic_block_data_ctx *next_block_ctx,
2840                 struct btrfsic_block *next_block,
2841                 struct btrfsic_block *from_block,
2842                 u64 parent_generation)
2843 {
2844         struct btrfsic_block_link *l;
2845
2846         l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev,
2847                                                 next_block_ctx->dev_bytenr,
2848                                                 from_block->dev_state->bdev,
2849                                                 from_block->dev_bytenr,
2850                                                 &state->block_link_hashtable);
2851         if (NULL == l) {
2852                 l = btrfsic_block_link_alloc();
2853                 if (NULL == l) {
2854                         printk(KERN_INFO
2855                                "btrfsic: error, kmalloc" " failed!\n");
2856                         return NULL;
2857                 }
2858
2859                 l->block_ref_to = next_block;
2860                 l->block_ref_from = from_block;
2861                 l->ref_cnt = 1;
2862                 l->parent_generation = parent_generation;
2863
2864                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2865                         btrfsic_print_add_link(state, l);
2866
2867                 list_add(&l->node_ref_to, &from_block->ref_to_list);
2868                 list_add(&l->node_ref_from, &next_block->ref_from_list);
2869
2870                 btrfsic_block_link_hashtable_add(l,
2871                                                  &state->block_link_hashtable);
2872         } else {
2873                 l->ref_cnt++;
2874                 l->parent_generation = parent_generation;
2875                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2876                         btrfsic_print_add_link(state, l);
2877         }
2878
2879         return l;
2880 }
2881
2882 static struct btrfsic_block *btrfsic_block_lookup_or_add(
2883                 struct btrfsic_state *state,
2884                 struct btrfsic_block_data_ctx *block_ctx,
2885                 const char *additional_string,
2886                 int is_metadata,
2887                 int is_iodone,
2888                 int never_written,
2889                 int mirror_num,
2890                 int *was_created)
2891 {
2892         struct btrfsic_block *block;
2893
2894         block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev,
2895                                                block_ctx->dev_bytenr,
2896                                                &state->block_hashtable);
2897         if (NULL == block) {
2898                 struct btrfsic_dev_state *dev_state;
2899
2900                 block = btrfsic_block_alloc();
2901                 if (NULL == block) {
2902                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2903                         return NULL;
2904                 }
2905                 dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev);
2906                 if (NULL == dev_state) {
2907                         printk(KERN_INFO
2908                                "btrfsic: error, lookup dev_state failed!\n");
2909                         btrfsic_block_free(block);
2910                         return NULL;
2911                 }
2912                 block->dev_state = dev_state;
2913                 block->dev_bytenr = block_ctx->dev_bytenr;
2914                 block->logical_bytenr = block_ctx->start;
2915                 block->is_metadata = is_metadata;
2916                 block->is_iodone = is_iodone;
2917                 block->never_written = never_written;
2918                 block->mirror_num = mirror_num;
2919                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2920                         printk(KERN_INFO
2921                                "New %s%c-block @%llu (%s/%llu/%d)\n",
2922                                additional_string,
2923                                btrfsic_get_block_type(state, block),
2924                                (unsigned long long)block->logical_bytenr,
2925                                dev_state->name,
2926                                (unsigned long long)block->dev_bytenr,
2927                                mirror_num);
2928                 list_add(&block->all_blocks_node, &state->all_blocks_list);
2929                 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2930                 if (NULL != was_created)
2931                         *was_created = 1;
2932         } else {
2933                 if (NULL != was_created)
2934                         *was_created = 0;
2935         }
2936
2937         return block;
2938 }
2939
2940 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2941                                            u64 bytenr,
2942                                            struct btrfsic_dev_state *dev_state,
2943                                            u64 dev_bytenr)
2944 {
2945         int num_copies;
2946         int mirror_num;
2947         int ret;
2948         struct btrfsic_block_data_ctx block_ctx;
2949         int match = 0;
2950
2951         num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree,
2952                                       bytenr, state->metablock_size);
2953
2954         for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2955                 ret = btrfsic_map_block(state, bytenr, state->metablock_size,
2956                                         &block_ctx, mirror_num);
2957                 if (ret) {
2958                         printk(KERN_INFO "btrfsic:"
2959                                " btrfsic_map_block(logical @%llu,"
2960                                " mirror %d) failed!\n",
2961                                (unsigned long long)bytenr, mirror_num);
2962                         continue;
2963                 }
2964
2965                 if (dev_state->bdev == block_ctx.dev->bdev &&
2966                     dev_bytenr == block_ctx.dev_bytenr) {
2967                         match++;
2968                         btrfsic_release_block_ctx(&block_ctx);
2969                         break;
2970                 }
2971                 btrfsic_release_block_ctx(&block_ctx);
2972         }
2973
2974         if (!match) {
2975                 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
2976                        " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
2977                        " phys_bytenr=%llu)!\n",
2978                        (unsigned long long)bytenr, dev_state->name,
2979                        (unsigned long long)dev_bytenr);
2980                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2981                         ret = btrfsic_map_block(state, bytenr,
2982                                                 state->metablock_size,
2983                                                 &block_ctx, mirror_num);
2984                         if (ret)
2985                                 continue;
2986
2987                         printk(KERN_INFO "Read logical bytenr @%llu maps to"
2988                                " (%s/%llu/%d)\n",
2989                                (unsigned long long)bytenr,
2990                                block_ctx.dev->name,
2991                                (unsigned long long)block_ctx.dev_bytenr,
2992                                mirror_num);
2993                 }
2994                 WARN_ON(1);
2995         }
2996 }
2997
2998 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
2999                 struct block_device *bdev)
3000 {
3001         struct btrfsic_dev_state *ds;
3002
3003         ds = btrfsic_dev_state_hashtable_lookup(bdev,
3004                                                 &btrfsic_dev_state_hashtable);
3005         return ds;
3006 }
3007
3008 int btrfsic_submit_bh(int rw, struct buffer_head *bh)
3009 {
3010         struct btrfsic_dev_state *dev_state;
3011
3012         if (!btrfsic_is_initialized)
3013                 return submit_bh(rw, bh);
3014
3015         mutex_lock(&btrfsic_mutex);
3016         /* since btrfsic_submit_bh() might also be called before
3017          * btrfsic_mount(), this might return NULL */
3018         dev_state = btrfsic_dev_state_lookup(bh->b_bdev);
3019
3020         /* Only called to write the superblock (incl. FLUSH/FUA) */
3021         if (NULL != dev_state &&
3022             (rw & WRITE) && bh->b_size > 0) {
3023                 u64 dev_bytenr;
3024
3025                 dev_bytenr = 4096 * bh->b_blocknr;
3026                 if (dev_state->state->print_mask &
3027                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3028                         printk(KERN_INFO
3029                                "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu),"
3030                                " size=%lu, data=%p, bdev=%p)\n",
3031                                rw, (unsigned long)bh->b_blocknr,
3032                                (unsigned long long)dev_bytenr,
3033                                (unsigned long)bh->b_size, bh->b_data,
3034                                bh->b_bdev);
3035                 btrfsic_process_written_block(dev_state, dev_bytenr,
3036                                               &bh->b_data, 1, NULL,
3037                                               NULL, bh, rw);
3038         } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
3039                 if (dev_state->state->print_mask &
3040                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3041                         printk(KERN_INFO
3042                                "submit_bh(rw=0x%x FLUSH, bdev=%p)\n",
3043                                rw, bh->b_bdev);
3044                 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
3045                         if ((dev_state->state->print_mask &
3046                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3047                               BTRFSIC_PRINT_MASK_VERBOSE)))
3048                                 printk(KERN_INFO
3049                                        "btrfsic_submit_bh(%s) with FLUSH"
3050                                        " but dummy block already in use"
3051                                        " (ignored)!\n",
3052                                        dev_state->name);
3053                 } else {
3054                         struct btrfsic_block *const block =
3055                                 &dev_state->dummy_block_for_bio_bh_flush;
3056
3057                         block->is_iodone = 0;
3058                         block->never_written = 0;
3059                         block->iodone_w_error = 0;
3060                         block->flush_gen = dev_state->last_flush_gen + 1;
3061                         block->submit_bio_bh_rw = rw;
3062                         block->orig_bio_bh_private = bh->b_private;
3063                         block->orig_bio_bh_end_io.bh = bh->b_end_io;
3064                         block->next_in_same_bio = NULL;
3065                         bh->b_private = block;
3066                         bh->b_end_io = btrfsic_bh_end_io;
3067                 }
3068         }
3069         mutex_unlock(&btrfsic_mutex);
3070         return submit_bh(rw, bh);
3071 }
3072
3073 void btrfsic_submit_bio(int rw, struct bio *bio)
3074 {
3075         struct btrfsic_dev_state *dev_state;
3076
3077         if (!btrfsic_is_initialized) {
3078                 submit_bio(rw, bio);
3079                 return;
3080         }
3081
3082         mutex_lock(&btrfsic_mutex);
3083         /* since btrfsic_submit_bio() is also called before
3084          * btrfsic_mount(), this might return NULL */
3085         dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
3086         if (NULL != dev_state &&
3087             (rw & WRITE) && NULL != bio->bi_io_vec) {
3088                 unsigned int i;
3089                 u64 dev_bytenr;
3090                 int bio_is_patched;
3091                 char **mapped_datav;
3092
3093                 dev_bytenr = 512 * bio->bi_sector;
3094                 bio_is_patched = 0;
3095                 if (dev_state->state->print_mask &
3096                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3097                         printk(KERN_INFO
3098                                "submit_bio(rw=0x%x, bi_vcnt=%u,"
3099                                " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n",
3100                                rw, bio->bi_vcnt, (unsigned long)bio->bi_sector,
3101                                (unsigned long long)dev_bytenr,
3102                                bio->bi_bdev);
3103
3104                 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
3105                                        GFP_NOFS);
3106                 if (!mapped_datav)
3107                         goto leave;
3108                 for (i = 0; i < bio->bi_vcnt; i++) {
3109                         BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
3110                         mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
3111                         if (!mapped_datav[i]) {
3112                                 while (i > 0) {
3113                                         i--;
3114                                         kunmap(bio->bi_io_vec[i].bv_page);
3115                                 }
3116                                 kfree(mapped_datav);
3117                                 goto leave;
3118                         }
3119                         if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3120                              BTRFSIC_PRINT_MASK_VERBOSE) ==
3121                             (dev_state->state->print_mask &
3122                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3123                               BTRFSIC_PRINT_MASK_VERBOSE)))
3124                                 printk(KERN_INFO
3125                                        "#%u: page=%p, len=%u, offset=%u\n",
3126                                        i, bio->bi_io_vec[i].bv_page,
3127                                        bio->bi_io_vec[i].bv_len,
3128                                        bio->bi_io_vec[i].bv_offset);
3129                 }
3130                 btrfsic_process_written_block(dev_state, dev_bytenr,
3131                                               mapped_datav, bio->bi_vcnt,
3132                                               bio, &bio_is_patched,
3133                                               NULL, rw);
3134                 while (i > 0) {
3135                         i--;
3136                         kunmap(bio->bi_io_vec[i].bv_page);
3137                 }
3138                 kfree(mapped_datav);
3139         } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
3140                 if (dev_state->state->print_mask &
3141                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3142                         printk(KERN_INFO
3143                                "submit_bio(rw=0x%x FLUSH, bdev=%p)\n",
3144                                rw, bio->bi_bdev);
3145                 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
3146                         if ((dev_state->state->print_mask &
3147                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3148                               BTRFSIC_PRINT_MASK_VERBOSE)))
3149                                 printk(KERN_INFO
3150                                        "btrfsic_submit_bio(%s) with FLUSH"
3151                                        " but dummy block already in use"
3152                                        " (ignored)!\n",
3153                                        dev_state->name);
3154                 } else {
3155                         struct btrfsic_block *const block =
3156                                 &dev_state->dummy_block_for_bio_bh_flush;
3157
3158                         block->is_iodone = 0;
3159                         block->never_written = 0;
3160                         block->iodone_w_error = 0;
3161                         block->flush_gen = dev_state->last_flush_gen + 1;
3162                         block->submit_bio_bh_rw = rw;
3163                         block->orig_bio_bh_private = bio->bi_private;
3164                         block->orig_bio_bh_end_io.bio = bio->bi_end_io;
3165                         block->next_in_same_bio = NULL;
3166                         bio->bi_private = block;
3167                         bio->bi_end_io = btrfsic_bio_end_io;
3168                 }
3169         }
3170 leave:
3171         mutex_unlock(&btrfsic_mutex);
3172
3173         submit_bio(rw, bio);
3174 }
3175
3176 int btrfsic_mount(struct btrfs_root *root,
3177                   struct btrfs_fs_devices *fs_devices,
3178                   int including_extent_data, u32 print_mask)
3179 {
3180         int ret;
3181         struct btrfsic_state *state;
3182         struct list_head *dev_head = &fs_devices->devices;
3183         struct btrfs_device *device;
3184
3185         if (root->nodesize != root->leafsize) {
3186                 printk(KERN_INFO
3187                        "btrfsic: cannot handle nodesize %d != leafsize %d!\n",
3188                        root->nodesize, root->leafsize);
3189                 return -1;
3190         }
3191         if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
3192                 printk(KERN_INFO
3193                        "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3194                        root->nodesize, (unsigned long)PAGE_CACHE_SIZE);
3195                 return -1;
3196         }
3197         if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3198                 printk(KERN_INFO
3199                        "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3200                        root->leafsize, (unsigned long)PAGE_CACHE_SIZE);
3201                 return -1;
3202         }
3203         if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3204                 printk(KERN_INFO
3205                        "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3206                        root->sectorsize, (unsigned long)PAGE_CACHE_SIZE);
3207                 return -1;
3208         }
3209         state = kzalloc(sizeof(*state), GFP_NOFS);
3210         if (NULL == state) {
3211                 printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n");
3212                 return -1;
3213         }
3214
3215         if (!btrfsic_is_initialized) {
3216                 mutex_init(&btrfsic_mutex);
3217                 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable);
3218                 btrfsic_is_initialized = 1;
3219         }
3220         mutex_lock(&btrfsic_mutex);
3221         state->root = root;
3222         state->print_mask = print_mask;
3223         state->include_extent_data = including_extent_data;
3224         state->csum_size = 0;
3225         state->metablock_size = root->nodesize;
3226         state->datablock_size = root->sectorsize;
3227         INIT_LIST_HEAD(&state->all_blocks_list);
3228         btrfsic_block_hashtable_init(&state->block_hashtable);
3229         btrfsic_block_link_hashtable_init(&state->block_link_hashtable);
3230         state->max_superblock_generation = 0;
3231         state->latest_superblock = NULL;
3232
3233         list_for_each_entry(device, dev_head, dev_list) {
3234                 struct btrfsic_dev_state *ds;
3235                 char *p;
3236
3237                 if (!device->bdev || !device->name)
3238                         continue;
3239
3240                 ds = btrfsic_dev_state_alloc();
3241                 if (NULL == ds) {
3242                         printk(KERN_INFO
3243                                "btrfs check-integrity: kmalloc() failed!\n");
3244                         mutex_unlock(&btrfsic_mutex);
3245                         return -1;
3246                 }
3247                 ds->bdev = device->bdev;
3248                 ds->state = state;
3249                 bdevname(ds->bdev, ds->name);
3250                 ds->name[BDEVNAME_SIZE - 1] = '\0';
3251                 for (p = ds->name; *p != '\0'; p++);
3252                 while (p > ds->name && *p != '/')
3253                         p--;
3254                 if (*p == '/')
3255                         p++;
3256                 strlcpy(ds->name, p, sizeof(ds->name));
3257                 btrfsic_dev_state_hashtable_add(ds,
3258                                                 &btrfsic_dev_state_hashtable);
3259         }
3260
3261         ret = btrfsic_process_superblock(state, fs_devices);
3262         if (0 != ret) {
3263                 mutex_unlock(&btrfsic_mutex);
3264                 btrfsic_unmount(root, fs_devices);
3265                 return ret;
3266         }
3267
3268         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE)
3269                 btrfsic_dump_database(state);
3270         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE)
3271                 btrfsic_dump_tree(state);
3272
3273         mutex_unlock(&btrfsic_mutex);
3274         return 0;
3275 }
3276
3277 void btrfsic_unmount(struct btrfs_root *root,
3278                      struct btrfs_fs_devices *fs_devices)
3279 {
3280         struct list_head *elem_all;
3281         struct list_head *tmp_all;
3282         struct btrfsic_state *state;
3283         struct list_head *dev_head = &fs_devices->devices;
3284         struct btrfs_device *device;
3285
3286         if (!btrfsic_is_initialized)
3287                 return;
3288
3289         mutex_lock(&btrfsic_mutex);
3290
3291         state = NULL;
3292         list_for_each_entry(device, dev_head, dev_list) {
3293                 struct btrfsic_dev_state *ds;
3294
3295                 if (!device->bdev || !device->name)
3296                         continue;
3297
3298                 ds = btrfsic_dev_state_hashtable_lookup(
3299                                 device->bdev,
3300                                 &btrfsic_dev_state_hashtable);
3301                 if (NULL != ds) {
3302                         state = ds->state;
3303                         btrfsic_dev_state_hashtable_remove(ds);
3304                         btrfsic_dev_state_free(ds);
3305                 }
3306         }
3307
3308         if (NULL == state) {
3309                 printk(KERN_INFO
3310                        "btrfsic: error, cannot find state information"
3311                        " on umount!\n");
3312                 mutex_unlock(&btrfsic_mutex);
3313                 return;
3314         }
3315
3316         /*
3317          * Don't care about keeping the lists' state up to date,
3318          * just free all memory that was allocated dynamically.
3319          * Free the blocks and the block_links.
3320          */
3321         list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) {
3322                 struct btrfsic_block *const b_all =
3323                     list_entry(elem_all, struct btrfsic_block,
3324                                all_blocks_node);
3325                 struct list_head *elem_ref_to;
3326                 struct list_head *tmp_ref_to;
3327
3328                 list_for_each_safe(elem_ref_to, tmp_ref_to,
3329                                    &b_all->ref_to_list) {
3330                         struct btrfsic_block_link *const l =
3331                             list_entry(elem_ref_to,
3332                                        struct btrfsic_block_link,
3333                                        node_ref_to);
3334
3335                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
3336                                 btrfsic_print_rem_link(state, l);
3337
3338                         l->ref_cnt--;
3339                         if (0 == l->ref_cnt)
3340                                 btrfsic_block_link_free(l);
3341                 }
3342
3343                 if (b_all->is_iodone || b_all->never_written)
3344                         btrfsic_block_free(b_all);
3345                 else
3346                         printk(KERN_INFO "btrfs: attempt to free %c-block"
3347                                " @%llu (%s/%llu/%d) on umount which is"
3348                                " not yet iodone!\n",
3349                                btrfsic_get_block_type(state, b_all),
3350                                (unsigned long long)b_all->logical_bytenr,
3351                                b_all->dev_state->name,
3352                                (unsigned long long)b_all->dev_bytenr,
3353                                b_all->mirror_num);
3354         }
3355
3356         mutex_unlock(&btrfsic_mutex);
3357
3358         kfree(state);
3359 }