2 * Copyright (C) 2012 Red Hat, Inc.
4 * Author: Mikulas Patocka <mpatocka@redhat.com>
6 * Based on Chromium dm-verity driver (C) 2011 The Chromium OS Authors
8 * This file is released under the GPLv2.
10 * In the file "/sys/module/dm_verity/parameters/prefetch_cluster" you can set
11 * default prefetch value. Data are read in "prefetch_cluster" chunks from the
12 * hash device. Setting this greatly improves performance when data and hash
13 * are on the same disk on different partitions on devices with poor random
19 #include <linux/module.h>
20 #include <linux/device-mapper.h>
21 #include <crypto/hash.h>
23 #define DM_MSG_PREFIX "verity"
25 #define DM_VERITY_IO_VEC_INLINE 16
26 #define DM_VERITY_MEMPOOL_SIZE 4
27 #define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144
29 #define DM_VERITY_MAX_LEVELS 63
31 static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
33 module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR);
36 struct dm_dev *data_dev;
37 struct dm_dev *hash_dev;
39 struct dm_bufio_client *bufio;
41 struct crypto_shash *tfm;
42 u8 *root_digest; /* digest of the root block */
43 u8 *salt; /* salt: its size is salt_size */
45 sector_t data_start; /* data offset in 512-byte sectors */
46 sector_t hash_start; /* hash start in blocks */
47 sector_t data_blocks; /* the number of data blocks */
48 sector_t hash_blocks; /* the number of hash blocks */
49 unsigned char data_dev_block_bits; /* log2(data blocksize) */
50 unsigned char hash_dev_block_bits; /* log2(hash blocksize) */
51 unsigned char hash_per_block_bits; /* log2(hashes in hash block) */
52 unsigned char levels; /* the number of tree levels */
53 unsigned char version;
54 unsigned digest_size; /* digest size for the current hash algorithm */
55 unsigned shash_descsize;/* the size of temporary space for crypto */
56 int hash_failed; /* set to 1 if hash of any block failed */
58 mempool_t *vec_mempool; /* mempool of bio vector */
60 struct workqueue_struct *verify_wq;
62 /* starting blocks for each tree level. 0 is the lowest level. */
63 sector_t hash_level_block[DM_VERITY_MAX_LEVELS];
69 /* original values of bio->bi_end_io and bio->bi_private */
70 bio_end_io_t *orig_bi_end_io;
71 void *orig_bi_private;
76 /* saved bio vector */
77 struct bio_vec *io_vec;
80 struct work_struct work;
82 /* A space for short vectors; longer vectors are allocated separately. */
83 struct bio_vec io_vec_inline[DM_VERITY_IO_VEC_INLINE];
86 * Three variably-size fields follow this struct:
88 * u8 hash_desc[v->shash_descsize];
89 * u8 real_digest[v->digest_size];
90 * u8 want_digest[v->digest_size];
92 * To access them use: io_hash_desc(), io_real_digest() and io_want_digest().
96 struct dm_verity_prefetch_work {
97 struct work_struct work;
103 static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io)
105 return (struct shash_desc *)(io + 1);
108 static u8 *io_real_digest(struct dm_verity *v, struct dm_verity_io *io)
110 return (u8 *)(io + 1) + v->shash_descsize;
113 static u8 *io_want_digest(struct dm_verity *v, struct dm_verity_io *io)
115 return (u8 *)(io + 1) + v->shash_descsize + v->digest_size;
119 * Auxiliary structure appended to each dm-bufio buffer. If the value
120 * hash_verified is nonzero, hash of the block has been verified.
122 * The variable hash_verified is set to 0 when allocating the buffer, then
123 * it can be changed to 1 and it is never reset to 0 again.
125 * There is no lock around this value, a race condition can at worst cause
126 * that multiple processes verify the hash of the same buffer simultaneously
127 * and write 1 to hash_verified simultaneously.
128 * This condition is harmless, so we don't need locking.
135 * Initialize struct buffer_aux for a freshly created buffer.
137 static void dm_bufio_alloc_callback(struct dm_buffer *buf)
139 struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
141 aux->hash_verified = 0;
145 * Translate input sector number to the sector number on the target device.
147 static sector_t verity_map_sector(struct dm_verity *v, sector_t bi_sector)
149 return v->data_start + dm_target_offset(v->ti, bi_sector);
153 * Return hash position of a specified block at a specified tree level
154 * (0 is the lowest level).
155 * The lowest "hash_per_block_bits"-bits of the result denote hash position
156 * inside a hash block. The remaining bits denote location of the hash block.
158 static sector_t verity_position_at_level(struct dm_verity *v, sector_t block,
161 return block >> (level * v->hash_per_block_bits);
164 static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level,
165 sector_t *hash_block, unsigned *offset)
167 sector_t position = verity_position_at_level(v, block, level);
170 *hash_block = v->hash_level_block[level] + (position >> v->hash_per_block_bits);
175 idx = position & ((1 << v->hash_per_block_bits) - 1);
177 *offset = idx * v->digest_size;
179 *offset = idx << (v->hash_dev_block_bits - v->hash_per_block_bits);
183 * Verify hash of a metadata block pertaining to the specified data block
184 * ("block" argument) at a specified level ("level" argument).
186 * On successful return, io_want_digest(v, io) contains the hash value for
187 * a lower tree level or for the data block (if we're at the lowest leve).
189 * If "skip_unverified" is true, unverified buffer is skipped and 1 is returned.
190 * If "skip_unverified" is false, unverified buffer is hashed and verified
191 * against current value of io_want_digest(v, io).
193 static int verity_verify_level(struct dm_verity_io *io, sector_t block,
194 int level, bool skip_unverified)
196 struct dm_verity *v = io->v;
197 struct dm_buffer *buf;
198 struct buffer_aux *aux;
204 verity_hash_at_level(v, block, level, &hash_block, &offset);
206 data = dm_bufio_read(v->bufio, hash_block, &buf);
207 if (unlikely(IS_ERR(data)))
208 return PTR_ERR(data);
210 aux = dm_bufio_get_aux_data(buf);
212 if (!aux->hash_verified) {
213 struct shash_desc *desc;
216 if (skip_unverified) {
221 desc = io_hash_desc(v, io);
223 desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
224 r = crypto_shash_init(desc);
226 DMERR("crypto_shash_init failed: %d", r);
230 if (likely(v->version >= 1)) {
231 r = crypto_shash_update(desc, v->salt, v->salt_size);
233 DMERR("crypto_shash_update failed: %d", r);
238 r = crypto_shash_update(desc, data, 1 << v->hash_dev_block_bits);
240 DMERR("crypto_shash_update failed: %d", r);
245 r = crypto_shash_update(desc, v->salt, v->salt_size);
247 DMERR("crypto_shash_update failed: %d", r);
252 result = io_real_digest(v, io);
253 r = crypto_shash_final(desc, result);
255 DMERR("crypto_shash_final failed: %d", r);
258 if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
259 DMERR_LIMIT("metadata block %llu is corrupted",
260 (unsigned long long)hash_block);
265 aux->hash_verified = 1;
270 memcpy(io_want_digest(v, io), data, v->digest_size);
272 dm_bufio_release(buf);
276 dm_bufio_release(buf);
282 * Verify one "dm_verity_io" structure.
284 static int verity_verify_io(struct dm_verity_io *io)
286 struct dm_verity *v = io->v;
289 unsigned vector = 0, offset = 0;
291 for (b = 0; b < io->n_blocks; b++) {
292 struct shash_desc *desc;
297 if (likely(v->levels)) {
299 * First, we try to get the requested hash for
300 * the current block. If the hash block itself is
301 * verified, zero is returned. If it isn't, this
302 * function returns 0 and we fall back to whole
303 * chain verification.
305 int r = verity_verify_level(io, io->block + b, 0, true);
307 goto test_block_hash;
312 memcpy(io_want_digest(v, io), v->root_digest, v->digest_size);
314 for (i = v->levels - 1; i >= 0; i--) {
315 int r = verity_verify_level(io, io->block + b, i, false);
321 desc = io_hash_desc(v, io);
323 desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
324 r = crypto_shash_init(desc);
326 DMERR("crypto_shash_init failed: %d", r);
330 if (likely(v->version >= 1)) {
331 r = crypto_shash_update(desc, v->salt, v->salt_size);
333 DMERR("crypto_shash_update failed: %d", r);
338 todo = 1 << v->data_dev_block_bits;
344 BUG_ON(vector >= io->io_vec_size);
345 bv = &io->io_vec[vector];
346 page = kmap_atomic(bv->bv_page);
347 len = bv->bv_len - offset;
348 if (likely(len >= todo))
350 r = crypto_shash_update(desc,
351 page + bv->bv_offset + offset, len);
354 DMERR("crypto_shash_update failed: %d", r);
358 if (likely(offset == bv->bv_len)) {
366 r = crypto_shash_update(desc, v->salt, v->salt_size);
368 DMERR("crypto_shash_update failed: %d", r);
373 result = io_real_digest(v, io);
374 r = crypto_shash_final(desc, result);
376 DMERR("crypto_shash_final failed: %d", r);
379 if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
380 DMERR_LIMIT("data block %llu is corrupted",
381 (unsigned long long)(io->block + b));
386 BUG_ON(vector != io->io_vec_size);
393 * End one "io" structure with a given error.
395 static void verity_finish_io(struct dm_verity_io *io, int error)
397 struct dm_verity *v = io->v;
398 struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size);
400 bio->bi_end_io = io->orig_bi_end_io;
401 bio->bi_private = io->orig_bi_private;
403 if (io->io_vec != io->io_vec_inline)
404 mempool_free(io->io_vec, v->vec_mempool);
406 bio_endio(bio, error);
409 static void verity_work(struct work_struct *w)
411 struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
413 verity_finish_io(io, verity_verify_io(io));
416 static void verity_end_io(struct bio *bio, int error)
418 struct dm_verity_io *io = bio->bi_private;
421 verity_finish_io(io, error);
425 INIT_WORK(&io->work, verity_work);
426 queue_work(io->v->verify_wq, &io->work);
430 * Prefetch buffers for the specified io.
431 * The root buffer is not prefetched, it is assumed that it will be cached
434 static void verity_prefetch_io(struct work_struct *work)
436 struct dm_verity_prefetch_work *pw =
437 container_of(work, struct dm_verity_prefetch_work, work);
438 struct dm_verity *v = pw->v;
441 for (i = v->levels - 2; i >= 0; i--) {
442 sector_t hash_block_start;
443 sector_t hash_block_end;
444 verity_hash_at_level(v, pw->block, i, &hash_block_start, NULL);
445 verity_hash_at_level(v, pw->block + pw->n_blocks - 1, i, &hash_block_end, NULL);
447 unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster);
449 cluster >>= v->data_dev_block_bits;
450 if (unlikely(!cluster))
451 goto no_prefetch_cluster;
453 if (unlikely(cluster & (cluster - 1)))
454 cluster = 1 << (fls(cluster) - 1);
456 hash_block_start &= ~(sector_t)(cluster - 1);
457 hash_block_end |= cluster - 1;
458 if (unlikely(hash_block_end >= v->hash_blocks))
459 hash_block_end = v->hash_blocks - 1;
462 dm_bufio_prefetch(v->bufio, hash_block_start,
463 hash_block_end - hash_block_start + 1);
469 static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io)
471 struct dm_verity_prefetch_work *pw;
473 pw = kmalloc(sizeof(struct dm_verity_prefetch_work),
474 GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
479 INIT_WORK(&pw->work, verity_prefetch_io);
481 pw->block = io->block;
482 pw->n_blocks = io->n_blocks;
483 queue_work(v->verify_wq, &pw->work);
487 * Bio map function. It allocates dm_verity_io structure and bio vector and
488 * fills them. Then it issues prefetches and the I/O.
490 static int verity_map(struct dm_target *ti, struct bio *bio)
492 struct dm_verity *v = ti->private;
493 struct dm_verity_io *io;
495 bio->bi_bdev = v->data_dev->bdev;
496 bio->bi_sector = verity_map_sector(v, bio->bi_sector);
498 if (((unsigned)bio->bi_sector | bio_sectors(bio)) &
499 ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) {
500 DMERR_LIMIT("unaligned io");
504 if ((bio->bi_sector + bio_sectors(bio)) >>
505 (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
506 DMERR_LIMIT("io out of range");
510 if (bio_data_dir(bio) == WRITE)
513 io = dm_per_bio_data(bio, ti->per_bio_data_size);
515 io->orig_bi_end_io = bio->bi_end_io;
516 io->orig_bi_private = bio->bi_private;
517 io->block = bio->bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT);
518 io->n_blocks = bio->bi_size >> v->data_dev_block_bits;
520 bio->bi_end_io = verity_end_io;
521 bio->bi_private = io;
522 io->io_vec_size = bio->bi_vcnt - bio->bi_idx;
523 if (io->io_vec_size < DM_VERITY_IO_VEC_INLINE)
524 io->io_vec = io->io_vec_inline;
526 io->io_vec = mempool_alloc(v->vec_mempool, GFP_NOIO);
527 memcpy(io->io_vec, bio_iovec(bio),
528 io->io_vec_size * sizeof(struct bio_vec));
530 verity_submit_prefetch(v, io);
532 generic_make_request(bio);
534 return DM_MAPIO_SUBMITTED;
538 * Status: V (valid) or C (corruption found)
540 static void verity_status(struct dm_target *ti, status_type_t type,
541 unsigned status_flags, char *result, unsigned maxlen)
543 struct dm_verity *v = ti->private;
548 case STATUSTYPE_INFO:
549 DMEMIT("%c", v->hash_failed ? 'C' : 'V');
551 case STATUSTYPE_TABLE:
552 DMEMIT("%u %s %s %u %u %llu %llu %s ",
556 1 << v->data_dev_block_bits,
557 1 << v->hash_dev_block_bits,
558 (unsigned long long)v->data_blocks,
559 (unsigned long long)v->hash_start,
562 for (x = 0; x < v->digest_size; x++)
563 DMEMIT("%02x", v->root_digest[x]);
568 for (x = 0; x < v->salt_size; x++)
569 DMEMIT("%02x", v->salt[x]);
574 static int verity_ioctl(struct dm_target *ti, unsigned cmd,
577 struct dm_verity *v = ti->private;
581 ti->len != i_size_read(v->data_dev->bdev->bd_inode) >> SECTOR_SHIFT)
582 r = scsi_verify_blk_ioctl(NULL, cmd);
584 return r ? : __blkdev_driver_ioctl(v->data_dev->bdev, v->data_dev->mode,
588 static int verity_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
589 struct bio_vec *biovec, int max_size)
591 struct dm_verity *v = ti->private;
592 struct request_queue *q = bdev_get_queue(v->data_dev->bdev);
594 if (!q->merge_bvec_fn)
597 bvm->bi_bdev = v->data_dev->bdev;
598 bvm->bi_sector = verity_map_sector(v, bvm->bi_sector);
600 return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
603 static int verity_iterate_devices(struct dm_target *ti,
604 iterate_devices_callout_fn fn, void *data)
606 struct dm_verity *v = ti->private;
608 return fn(ti, v->data_dev, v->data_start, ti->len, data);
611 static void verity_io_hints(struct dm_target *ti, struct queue_limits *limits)
613 struct dm_verity *v = ti->private;
615 if (limits->logical_block_size < 1 << v->data_dev_block_bits)
616 limits->logical_block_size = 1 << v->data_dev_block_bits;
618 if (limits->physical_block_size < 1 << v->data_dev_block_bits)
619 limits->physical_block_size = 1 << v->data_dev_block_bits;
621 blk_limits_io_min(limits, limits->logical_block_size);
624 static void verity_dtr(struct dm_target *ti)
626 struct dm_verity *v = ti->private;
629 destroy_workqueue(v->verify_wq);
632 mempool_destroy(v->vec_mempool);
635 dm_bufio_client_destroy(v->bufio);
638 kfree(v->root_digest);
641 crypto_free_shash(v->tfm);
646 dm_put_device(ti, v->hash_dev);
649 dm_put_device(ti, v->data_dev);
656 * <version> The current format is version 1.
657 * Vsn 0 is compatible with original Chromium OS releases.
662 * <the number of data blocks>
666 * <salt> Hex string or "-" if no salt.
668 static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
672 unsigned long long num_ll;
675 sector_t hash_position;
678 v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL);
680 ti->error = "Cannot allocate verity structure";
686 if ((dm_table_get_mode(ti->table) & ~FMODE_READ)) {
687 ti->error = "Device must be readonly";
693 ti->error = "Invalid argument count: exactly 10 arguments required";
698 if (sscanf(argv[0], "%d%c", &num, &dummy) != 1 ||
699 num < 0 || num > 1) {
700 ti->error = "Invalid version";
706 r = dm_get_device(ti, argv[1], FMODE_READ, &v->data_dev);
708 ti->error = "Data device lookup failed";
712 r = dm_get_device(ti, argv[2], FMODE_READ, &v->hash_dev);
714 ti->error = "Data device lookup failed";
718 if (sscanf(argv[3], "%u%c", &num, &dummy) != 1 ||
719 !num || (num & (num - 1)) ||
720 num < bdev_logical_block_size(v->data_dev->bdev) ||
722 ti->error = "Invalid data device block size";
726 v->data_dev_block_bits = ffs(num) - 1;
728 if (sscanf(argv[4], "%u%c", &num, &dummy) != 1 ||
729 !num || (num & (num - 1)) ||
730 num < bdev_logical_block_size(v->hash_dev->bdev) ||
732 ti->error = "Invalid hash device block size";
736 v->hash_dev_block_bits = ffs(num) - 1;
738 if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 ||
739 (sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT))
740 >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) {
741 ti->error = "Invalid data blocks";
745 v->data_blocks = num_ll;
747 if (ti->len > (v->data_blocks << (v->data_dev_block_bits - SECTOR_SHIFT))) {
748 ti->error = "Data device is too small";
753 if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 ||
754 (sector_t)(num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT))
755 >> (v->hash_dev_block_bits - SECTOR_SHIFT) != num_ll) {
756 ti->error = "Invalid hash start";
760 v->hash_start = num_ll;
762 v->alg_name = kstrdup(argv[7], GFP_KERNEL);
764 ti->error = "Cannot allocate algorithm name";
769 v->tfm = crypto_alloc_shash(v->alg_name, 0, 0);
770 if (IS_ERR(v->tfm)) {
771 ti->error = "Cannot initialize hash function";
776 v->digest_size = crypto_shash_digestsize(v->tfm);
777 if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
778 ti->error = "Digest size too big";
783 sizeof(struct shash_desc) + crypto_shash_descsize(v->tfm);
785 v->root_digest = kmalloc(v->digest_size, GFP_KERNEL);
786 if (!v->root_digest) {
787 ti->error = "Cannot allocate root digest";
791 if (strlen(argv[8]) != v->digest_size * 2 ||
792 hex2bin(v->root_digest, argv[8], v->digest_size)) {
793 ti->error = "Invalid root digest";
798 if (strcmp(argv[9], "-")) {
799 v->salt_size = strlen(argv[9]) / 2;
800 v->salt = kmalloc(v->salt_size, GFP_KERNEL);
802 ti->error = "Cannot allocate salt";
806 if (strlen(argv[9]) != v->salt_size * 2 ||
807 hex2bin(v->salt, argv[9], v->salt_size)) {
808 ti->error = "Invalid salt";
814 v->hash_per_block_bits =
815 fls((1 << v->hash_dev_block_bits) / v->digest_size) - 1;
819 while (v->hash_per_block_bits * v->levels < 64 &&
820 (unsigned long long)(v->data_blocks - 1) >>
821 (v->hash_per_block_bits * v->levels))
824 if (v->levels > DM_VERITY_MAX_LEVELS) {
825 ti->error = "Too many tree levels";
830 hash_position = v->hash_start;
831 for (i = v->levels - 1; i >= 0; i--) {
833 v->hash_level_block[i] = hash_position;
834 s = verity_position_at_level(v, v->data_blocks, i);
835 s = (s >> v->hash_per_block_bits) +
836 !!(s & ((1 << v->hash_per_block_bits) - 1));
837 if (hash_position + s < hash_position) {
838 ti->error = "Hash device offset overflow";
844 v->hash_blocks = hash_position;
846 v->bufio = dm_bufio_client_create(v->hash_dev->bdev,
847 1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux),
848 dm_bufio_alloc_callback, NULL);
849 if (IS_ERR(v->bufio)) {
850 ti->error = "Cannot initialize dm-bufio";
851 r = PTR_ERR(v->bufio);
856 if (dm_bufio_get_device_size(v->bufio) < v->hash_blocks) {
857 ti->error = "Hash device is too small";
862 ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io));
864 v->vec_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE,
865 BIO_MAX_PAGES * sizeof(struct bio_vec));
866 if (!v->vec_mempool) {
867 ti->error = "Cannot allocate vector mempool";
872 /* WQ_UNBOUND greatly improves performance when running on ramdisk */
873 v->verify_wq = alloc_workqueue("kverityd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus());
875 ti->error = "Cannot allocate workqueue";
888 static struct target_type verity_target = {
890 .version = {1, 2, 0},
891 .module = THIS_MODULE,
895 .status = verity_status,
896 .ioctl = verity_ioctl,
897 .merge = verity_merge,
898 .iterate_devices = verity_iterate_devices,
899 .io_hints = verity_io_hints,
902 static int __init dm_verity_init(void)
906 r = dm_register_target(&verity_target);
908 DMERR("register failed %d", r);
913 static void __exit dm_verity_exit(void)
915 dm_unregister_target(&verity_target);
918 module_init(dm_verity_init);
919 module_exit(dm_verity_exit);
921 MODULE_AUTHOR("Mikulas Patocka <mpatocka@redhat.com>");
922 MODULE_AUTHOR("Mandeep Baines <msb@chromium.org>");
923 MODULE_AUTHOR("Will Drewry <wad@chromium.org>");
924 MODULE_DESCRIPTION(DM_NAME " target for transparent disk integrity checking");
925 MODULE_LICENSE("GPL");