1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2015 Google, Inc.
5 * Author: Sami Tolvanen <samitolvanen@google.com>
8 #include "dm-verity-fec.h"
9 #include <linux/math64.h>
11 #define DM_MSG_PREFIX "verity-fec"
14 * If error correction has been configured, returns true.
16 bool verity_fec_is_enabled(struct dm_verity *v)
18 return v->fec && v->fec->dev;
22 * Return a pointer to dm_verity_fec_io after dm_verity_io and its variable
25 static inline struct dm_verity_fec_io *fec_io(struct dm_verity_io *io)
27 return (struct dm_verity_fec_io *) verity_io_digest_end(io->v, io);
31 * Return an interleaved offset for a byte in RS block.
33 static inline u64 fec_interleave(struct dm_verity *v, u64 offset)
37 mod = do_div(offset, v->fec->rsn);
38 return offset + mod * (v->fec->rounds << v->data_dev_block_bits);
42 * Decode an RS block using Reed-Solomon.
44 static int fec_decode_rs8(struct dm_verity *v, struct dm_verity_fec_io *fio,
45 u8 *data, u8 *fec, int neras)
48 uint16_t par[DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN];
50 for (i = 0; i < v->fec->roots; i++)
53 return decode_rs8(fio->rs, data, par, v->fec->rsn, NULL, neras,
54 fio->erasures, 0, NULL);
58 * Read error-correcting codes for the requested RS block. Returns a pointer
59 * to the data block. Caller is responsible for releasing buf.
61 static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index,
62 unsigned *offset, struct dm_buffer **buf)
64 u64 position, block, rem;
67 position = (index + rsb) * v->fec->roots;
68 block = div64_u64_rem(position, v->fec->io_size, &rem);
69 *offset = (unsigned)rem;
71 res = dm_bufio_read(v->fec->bufio, block, buf);
73 DMERR("%s: FEC %llu: parity read failed (block %llu): %ld",
74 v->data_dev->name, (unsigned long long)rsb,
75 (unsigned long long)block, PTR_ERR(res));
82 /* Loop over each preallocated buffer slot. */
83 #define fec_for_each_prealloc_buffer(__i) \
84 for (__i = 0; __i < DM_VERITY_FEC_BUF_PREALLOC; __i++)
86 /* Loop over each extra buffer slot. */
87 #define fec_for_each_extra_buffer(io, __i) \
88 for (__i = DM_VERITY_FEC_BUF_PREALLOC; __i < DM_VERITY_FEC_BUF_MAX; __i++)
90 /* Loop over each allocated buffer. */
91 #define fec_for_each_buffer(io, __i) \
92 for (__i = 0; __i < (io)->nbufs; __i++)
94 /* Loop over each RS block in each allocated buffer. */
95 #define fec_for_each_buffer_rs_block(io, __i, __j) \
96 fec_for_each_buffer(io, __i) \
97 for (__j = 0; __j < 1 << DM_VERITY_FEC_BUF_RS_BITS; __j++)
100 * Return a pointer to the current RS block when called inside
101 * fec_for_each_buffer_rs_block.
103 static inline u8 *fec_buffer_rs_block(struct dm_verity *v,
104 struct dm_verity_fec_io *fio,
105 unsigned i, unsigned j)
107 return &fio->bufs[i][j * v->fec->rsn];
111 * Return an index to the current RS block when called inside
112 * fec_for_each_buffer_rs_block.
114 static inline unsigned fec_buffer_rs_index(unsigned i, unsigned j)
116 return (i << DM_VERITY_FEC_BUF_RS_BITS) + j;
120 * Decode all RS blocks from buffers and copy corrected bytes into fio->output
121 * starting from block_offset.
123 static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
124 u64 rsb, int byte_index, unsigned block_offset,
127 int r, corrected = 0, res;
128 struct dm_buffer *buf;
129 unsigned n, i, offset;
132 par = fec_read_parity(v, rsb, block_offset, &offset, &buf);
137 * Decode the RS blocks we have in bufs. Each RS block results in
138 * one corrected target byte and consumes fec->roots parity bytes.
140 fec_for_each_buffer_rs_block(fio, n, i) {
141 block = fec_buffer_rs_block(v, fio, n, i);
142 res = fec_decode_rs8(v, fio, block, &par[offset], neras);
149 fio->output[block_offset] = block[byte_index];
152 if (block_offset >= 1 << v->data_dev_block_bits)
155 /* read the next block when we run out of parity bytes */
156 offset += v->fec->roots;
157 if (offset >= v->fec->io_size) {
158 dm_bufio_release(buf);
160 par = fec_read_parity(v, rsb, block_offset, &offset, &buf);
168 dm_bufio_release(buf);
171 DMERR_LIMIT("%s: FEC %llu: failed to correct: %d",
172 v->data_dev->name, (unsigned long long)rsb, r);
174 DMWARN_LIMIT("%s: FEC %llu: corrected %d errors",
175 v->data_dev->name, (unsigned long long)rsb, r);
181 * Locate data block erasures using verity hashes.
183 static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
184 u8 *want_digest, u8 *data)
186 if (unlikely(verity_hash(v, verity_io_hash_req(v, io),
187 data, 1 << v->data_dev_block_bits,
188 verity_io_real_digest(v, io))))
191 return memcmp(verity_io_real_digest(v, io), want_digest,
192 v->digest_size) != 0;
196 * Read data blocks that are part of the RS block and deinterleave as much as
197 * fits into buffers. Check for erasure locations if @neras is non-NULL.
199 static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io,
200 u64 rsb, u64 target, unsigned block_offset,
204 int i, j, target_index = -1;
205 struct dm_buffer *buf;
206 struct dm_bufio_client *bufio;
207 struct dm_verity_fec_io *fio = fec_io(io);
210 u8 want_digest[HASH_MAX_DIGESTSIZE];
216 if (WARN_ON(v->digest_size > sizeof(want_digest)))
220 * read each of the rsn data blocks that are part of the RS block, and
221 * interleave contents to available bufs
223 for (i = 0; i < v->fec->rsn; i++) {
224 ileaved = fec_interleave(v, rsb * v->fec->rsn + i);
227 * target is the data block we want to correct, target_index is
228 * the index of this block within the rsn RS blocks
230 if (ileaved == target)
233 block = ileaved >> v->data_dev_block_bits;
234 bufio = v->fec->data_bufio;
236 if (block >= v->data_blocks) {
237 block -= v->data_blocks;
240 * blocks outside the area were assumed to contain
241 * zeros when encoding data was generated
243 if (unlikely(block >= v->fec->hash_blocks))
246 block += v->hash_start;
250 bbuf = dm_bufio_read(bufio, block, &buf);
252 DMWARN_LIMIT("%s: FEC %llu: read failed (%llu): %ld",
254 (unsigned long long)rsb,
255 (unsigned long long)block, PTR_ERR(bbuf));
257 /* assume the block is corrupted */
258 if (neras && *neras <= v->fec->roots)
259 fio->erasures[(*neras)++] = i;
264 /* locate erasures if the block is on the data device */
265 if (bufio == v->fec->data_bufio &&
266 verity_hash_for_block(v, io, block, want_digest,
268 /* skip known zero blocks entirely */
273 * skip if we have already found the theoretical
274 * maximum number (i.e. fec->roots) of erasures
276 if (neras && *neras <= v->fec->roots &&
277 fec_is_erasure(v, io, want_digest, bbuf))
278 fio->erasures[(*neras)++] = i;
282 * deinterleave and copy the bytes that fit into bufs,
283 * starting from block_offset
285 fec_for_each_buffer_rs_block(fio, n, j) {
286 k = fec_buffer_rs_index(n, j) + block_offset;
288 if (k >= 1 << v->data_dev_block_bits)
291 rs_block = fec_buffer_rs_block(v, fio, n, j);
292 rs_block[i] = bbuf[k];
295 dm_bufio_release(buf);
302 * Allocate RS control structure and FEC buffers from preallocated mempools,
303 * and attempt to allocate as many extra buffers as available.
305 static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
310 fio->rs = mempool_alloc(&v->fec->rs_pool, GFP_NOIO);
312 fec_for_each_prealloc_buffer(n) {
316 fio->bufs[n] = mempool_alloc(&v->fec->prealloc_pool, GFP_NOWAIT);
317 if (unlikely(!fio->bufs[n])) {
318 DMERR("failed to allocate FEC buffer");
323 /* try to allocate the maximum number of buffers */
324 fec_for_each_extra_buffer(fio, n) {
328 fio->bufs[n] = mempool_alloc(&v->fec->extra_pool, GFP_NOWAIT);
329 /* we can manage with even one buffer if necessary */
330 if (unlikely(!fio->bufs[n]))
336 fio->output = mempool_alloc(&v->fec->output_pool, GFP_NOIO);
342 * Initialize buffers and clear erasures. fec_read_bufs() assumes buffers are
343 * zeroed before deinterleaving.
345 static void fec_init_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
349 fec_for_each_buffer(fio, n)
350 memset(fio->bufs[n], 0, v->fec->rsn << DM_VERITY_FEC_BUF_RS_BITS);
352 memset(fio->erasures, 0, sizeof(fio->erasures));
356 * Decode all RS blocks in a single data block and return the target block
357 * (indicated by @offset) in fio->output. If @use_erasures is non-zero, uses
358 * hashes to locate erasures.
360 static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
361 struct dm_verity_fec_io *fio, u64 rsb, u64 offset,
367 r = fec_alloc_bufs(v, fio);
371 for (pos = 0; pos < 1 << v->data_dev_block_bits; ) {
372 fec_init_bufs(v, fio);
374 r = fec_read_bufs(v, io, rsb, offset, pos,
375 use_erasures ? &neras : NULL);
379 r = fec_decode_bufs(v, fio, rsb, r, pos, neras);
383 pos += fio->nbufs << DM_VERITY_FEC_BUF_RS_BITS;
386 /* Always re-validate the corrected block against the expected hash */
387 r = verity_hash(v, verity_io_hash_req(v, io), fio->output,
388 1 << v->data_dev_block_bits,
389 verity_io_real_digest(v, io));
393 if (memcmp(verity_io_real_digest(v, io), verity_io_want_digest(v, io),
395 DMERR_LIMIT("%s: FEC %llu: failed to correct (%d erasures)",
396 v->data_dev->name, (unsigned long long)rsb, neras);
403 static int fec_bv_copy(struct dm_verity *v, struct dm_verity_io *io, u8 *data,
406 struct dm_verity_fec_io *fio = fec_io(io);
408 memcpy(data, &fio->output[fio->output_pos], len);
409 fio->output_pos += len;
415 * Correct errors in a block. Copies corrected block to dest if non-NULL,
416 * otherwise to a bio_vec starting from iter.
418 int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
419 enum verity_block_type type, sector_t block, u8 *dest,
420 struct bvec_iter *iter)
423 struct dm_verity_fec_io *fio = fec_io(io);
424 u64 offset, res, rsb;
426 if (!verity_fec_is_enabled(v))
429 if (fio->level >= DM_VERITY_FEC_MAX_RECURSION) {
430 DMWARN_LIMIT("%s: FEC: recursion too deep", v->data_dev->name);
436 if (type == DM_VERITY_BLOCK_TYPE_METADATA)
437 block = block - v->hash_start + v->data_blocks;
440 * For RS(M, N), the continuous FEC data is divided into blocks of N
441 * bytes. Since block size may not be divisible by N, the last block
442 * is zero padded when decoding.
444 * Each byte of the block is covered by a different RS(M, N) code,
445 * and each code is interleaved over N blocks to make it less likely
446 * that bursty corruption will leave us in unrecoverable state.
449 offset = block << v->data_dev_block_bits;
450 res = div64_u64(offset, v->fec->rounds << v->data_dev_block_bits);
453 * The base RS block we can feed to the interleaver to find out all
454 * blocks required for decoding.
456 rsb = offset - res * (v->fec->rounds << v->data_dev_block_bits);
459 * Locating erasures is slow, so attempt to recover the block without
460 * them first. Do a second attempt with erasures if the corruption is
463 r = fec_decode_rsb(v, io, fio, rsb, offset, false);
465 r = fec_decode_rsb(v, io, fio, rsb, offset, true);
471 memcpy(dest, fio->output, 1 << v->data_dev_block_bits);
474 r = verity_for_bv_block(v, io, iter, fec_bv_copy);
483 * Clean up per-bio data.
485 void verity_fec_finish_io(struct dm_verity_io *io)
488 struct dm_verity_fec *f = io->v->fec;
489 struct dm_verity_fec_io *fio = fec_io(io);
491 if (!verity_fec_is_enabled(io->v))
494 mempool_free(fio->rs, &f->rs_pool);
496 fec_for_each_prealloc_buffer(n)
497 mempool_free(fio->bufs[n], &f->prealloc_pool);
499 fec_for_each_extra_buffer(fio, n)
500 mempool_free(fio->bufs[n], &f->extra_pool);
502 mempool_free(fio->output, &f->output_pool);
506 * Initialize per-bio data.
508 void verity_fec_init_io(struct dm_verity_io *io)
510 struct dm_verity_fec_io *fio = fec_io(io);
512 if (!verity_fec_is_enabled(io->v))
516 memset(fio->bufs, 0, sizeof(fio->bufs));
523 * Append feature arguments and values to the status table.
525 unsigned verity_fec_status_table(struct dm_verity *v, unsigned sz,
526 char *result, unsigned maxlen)
528 if (!verity_fec_is_enabled(v))
531 DMEMIT(" " DM_VERITY_OPT_FEC_DEV " %s "
532 DM_VERITY_OPT_FEC_BLOCKS " %llu "
533 DM_VERITY_OPT_FEC_START " %llu "
534 DM_VERITY_OPT_FEC_ROOTS " %d",
536 (unsigned long long)v->fec->blocks,
537 (unsigned long long)v->fec->start,
543 void verity_fec_dtr(struct dm_verity *v)
545 struct dm_verity_fec *f = v->fec;
547 if (!verity_fec_is_enabled(v))
550 mempool_exit(&f->rs_pool);
551 mempool_exit(&f->prealloc_pool);
552 mempool_exit(&f->extra_pool);
553 mempool_exit(&f->output_pool);
554 kmem_cache_destroy(f->cache);
557 dm_bufio_client_destroy(f->data_bufio);
559 dm_bufio_client_destroy(f->bufio);
562 dm_put_device(v->ti, f->dev);
568 static void *fec_rs_alloc(gfp_t gfp_mask, void *pool_data)
570 struct dm_verity *v = (struct dm_verity *)pool_data;
572 return init_rs_gfp(8, 0x11d, 0, 1, v->fec->roots, gfp_mask);
575 static void fec_rs_free(void *element, void *pool_data)
577 struct rs_control *rs = (struct rs_control *)element;
583 bool verity_is_fec_opt_arg(const char *arg_name)
585 return (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_DEV) ||
586 !strcasecmp(arg_name, DM_VERITY_OPT_FEC_BLOCKS) ||
587 !strcasecmp(arg_name, DM_VERITY_OPT_FEC_START) ||
588 !strcasecmp(arg_name, DM_VERITY_OPT_FEC_ROOTS));
591 int verity_fec_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
592 unsigned *argc, const char *arg_name)
595 struct dm_target *ti = v->ti;
596 const char *arg_value;
597 unsigned long long num_ll;
602 ti->error = "FEC feature arguments require a value";
606 arg_value = dm_shift_arg(as);
609 if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_DEV)) {
610 r = dm_get_device(ti, arg_value, FMODE_READ, &v->fec->dev);
612 ti->error = "FEC device lookup failed";
616 } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_BLOCKS)) {
617 if (sscanf(arg_value, "%llu%c", &num_ll, &dummy) != 1 ||
618 ((sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT))
619 >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll)) {
620 ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS;
623 v->fec->blocks = num_ll;
625 } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_START)) {
626 if (sscanf(arg_value, "%llu%c", &num_ll, &dummy) != 1 ||
627 ((sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) >>
628 (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll)) {
629 ti->error = "Invalid " DM_VERITY_OPT_FEC_START;
632 v->fec->start = num_ll;
634 } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_ROOTS)) {
635 if (sscanf(arg_value, "%hhu%c", &num_c, &dummy) != 1 || !num_c ||
636 num_c < (DM_VERITY_FEC_RSM - DM_VERITY_FEC_MAX_RSN) ||
637 num_c > (DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN)) {
638 ti->error = "Invalid " DM_VERITY_OPT_FEC_ROOTS;
641 v->fec->roots = num_c;
644 ti->error = "Unrecognized verity FEC feature request";
652 * Allocate dm_verity_fec for v->fec. Must be called before verity_fec_ctr.
654 int verity_fec_ctr_alloc(struct dm_verity *v)
656 struct dm_verity_fec *f;
658 f = kzalloc(sizeof(struct dm_verity_fec), GFP_KERNEL);
660 v->ti->error = "Cannot allocate FEC structure";
669 * Validate arguments and preallocate memory. Must be called after arguments
670 * have been parsed using verity_fec_parse_opt_args.
672 int verity_fec_ctr(struct dm_verity *v)
674 struct dm_verity_fec *f = v->fec;
675 struct dm_target *ti = v->ti;
676 u64 hash_blocks, fec_blocks;
679 if (!verity_fec_is_enabled(v)) {
685 * FEC is computed over data blocks, possible metadata, and
686 * hash blocks. In other words, FEC covers total of fec_blocks
687 * blocks consisting of the following:
689 * data blocks | hash blocks | metadata (optional)
691 * We allow metadata after hash blocks to support a use case
692 * where all data is stored on the same device and FEC covers
695 * If metadata is included, we require it to be available on the
696 * hash device after the hash blocks.
699 hash_blocks = v->hash_blocks - v->hash_start;
702 * Require matching block sizes for data and hash devices for
705 if (v->data_dev_block_bits != v->hash_dev_block_bits) {
706 ti->error = "Block sizes must match to use FEC";
711 ti->error = "Missing " DM_VERITY_OPT_FEC_ROOTS;
714 f->rsn = DM_VERITY_FEC_RSM - f->roots;
717 ti->error = "Missing " DM_VERITY_OPT_FEC_BLOCKS;
721 f->rounds = f->blocks;
722 if (sector_div(f->rounds, f->rsn))
726 * Due to optional metadata, f->blocks can be larger than
727 * data_blocks and hash_blocks combined.
729 if (f->blocks < v->data_blocks + hash_blocks || !f->rounds) {
730 ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS;
735 * Metadata is accessed through the hash device, so we require
736 * it to be large enough.
738 f->hash_blocks = f->blocks - v->data_blocks;
739 if (dm_bufio_get_device_size(v->bufio) < f->hash_blocks) {
740 ti->error = "Hash device is too small for "
741 DM_VERITY_OPT_FEC_BLOCKS;
745 if ((f->roots << SECTOR_SHIFT) & ((1 << v->data_dev_block_bits) - 1))
746 f->io_size = 1 << v->data_dev_block_bits;
748 f->io_size = v->fec->roots << SECTOR_SHIFT;
750 f->bufio = dm_bufio_client_create(f->dev->bdev,
752 1, 0, NULL, NULL, 0);
753 if (IS_ERR(f->bufio)) {
754 ti->error = "Cannot initialize FEC bufio client";
755 return PTR_ERR(f->bufio);
758 dm_bufio_set_sector_offset(f->bufio, f->start << (v->data_dev_block_bits - SECTOR_SHIFT));
760 fec_blocks = div64_u64(f->rounds * f->roots, v->fec->roots << SECTOR_SHIFT);
761 if (dm_bufio_get_device_size(f->bufio) < fec_blocks) {
762 ti->error = "FEC device is too small";
766 f->data_bufio = dm_bufio_client_create(v->data_dev->bdev,
767 1 << v->data_dev_block_bits,
768 1, 0, NULL, NULL, 0);
769 if (IS_ERR(f->data_bufio)) {
770 ti->error = "Cannot initialize FEC data bufio client";
771 return PTR_ERR(f->data_bufio);
774 if (dm_bufio_get_device_size(f->data_bufio) < v->data_blocks) {
775 ti->error = "Data device is too small";
779 /* Preallocate an rs_control structure for each worker thread */
780 ret = mempool_init(&f->rs_pool, num_online_cpus(), fec_rs_alloc,
781 fec_rs_free, (void *) v);
783 ti->error = "Cannot allocate RS pool";
787 f->cache = kmem_cache_create("dm_verity_fec_buffers",
788 f->rsn << DM_VERITY_FEC_BUF_RS_BITS,
791 ti->error = "Cannot create FEC buffer cache";
795 /* Preallocate DM_VERITY_FEC_BUF_PREALLOC buffers for each thread */
796 ret = mempool_init_slab_pool(&f->prealloc_pool, num_online_cpus() *
797 DM_VERITY_FEC_BUF_PREALLOC,
800 ti->error = "Cannot allocate FEC buffer prealloc pool";
804 ret = mempool_init_slab_pool(&f->extra_pool, 0, f->cache);
806 ti->error = "Cannot allocate FEC buffer extra pool";
810 /* Preallocate an output buffer for each thread */
811 ret = mempool_init_kmalloc_pool(&f->output_pool, num_online_cpus(),
812 1 << v->data_dev_block_bits);
814 ti->error = "Cannot allocate FEC output pool";
818 /* Reserve space for our per-bio data */
819 ti->per_io_data_size += sizeof(struct dm_verity_fec_io);