2 * AMD Cryptographic Coprocessor (CCP) driver
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/pci_ids.h>
17 #include <linux/kthread.h>
18 #include <linux/sched.h>
19 #include <linux/interrupt.h>
20 #include <linux/spinlock.h>
21 #include <linux/mutex.h>
22 #include <linux/delay.h>
23 #include <linux/ccp.h>
24 #include <linux/scatterlist.h>
25 #include <crypto/scatterwalk.h>
31 CCP_MEMTYPE_SYSTEM = 0,
41 enum dma_data_direction dir;
44 struct ccp_dm_workarea {
46 struct dma_pool *dma_pool;
50 struct ccp_dma_info dma;
53 struct ccp_sg_workarea {
54 struct scatterlist *sg;
58 struct scatterlist *dma_sg;
59 struct device *dma_dev;
60 unsigned int dma_count;
61 enum dma_data_direction dma_dir;
69 struct ccp_sg_workarea sg_wa;
70 struct ccp_dm_workarea dm_wa;
74 enum ccp_memtype type;
76 struct ccp_dma_info dma;
82 enum ccp_aes_type type;
83 enum ccp_aes_mode mode;
84 enum ccp_aes_action action;
87 struct ccp_xts_aes_op {
88 enum ccp_aes_action action;
89 enum ccp_xts_aes_unit_size unit_size;
93 enum ccp_sha_type type;
102 struct ccp_passthru_op {
103 enum ccp_passthru_bitwise bit_mod;
104 enum ccp_passthru_byteswap byte_swap;
108 enum ccp_ecc_function function;
112 struct ccp_cmd_queue *cmd_q;
126 struct ccp_aes_op aes;
127 struct ccp_xts_aes_op xts;
128 struct ccp_sha_op sha;
129 struct ccp_rsa_op rsa;
130 struct ccp_passthru_op passthru;
131 struct ccp_ecc_op ecc;
135 /* The CCP cannot perform zero-length sha operations so the caller
136 * is required to buffer data for the final operation. However, a
137 * sha operation for a message with a total length of zero is valid
138 * so known values are required to supply the result.
140 static const u8 ccp_sha1_zero[CCP_SHA_CTXSIZE] = {
141 0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d,
142 0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90,
143 0xaf, 0xd8, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00,
144 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
147 static const u8 ccp_sha224_zero[CCP_SHA_CTXSIZE] = {
148 0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9,
149 0x47, 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4,
150 0x15, 0xa2, 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a,
151 0xc5, 0xb3, 0xe4, 0x2f, 0x00, 0x00, 0x00, 0x00,
154 static const u8 ccp_sha256_zero[CCP_SHA_CTXSIZE] = {
155 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
156 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
157 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
158 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55,
161 static u32 ccp_addr_lo(struct ccp_dma_info *info)
163 return lower_32_bits(info->address + info->offset);
166 static u32 ccp_addr_hi(struct ccp_dma_info *info)
168 return upper_32_bits(info->address + info->offset) & 0x0000ffff;
171 static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
173 struct ccp_cmd_queue *cmd_q = op->cmd_q;
174 struct ccp_device *ccp = cmd_q->ccp;
175 void __iomem *cr_addr;
180 /* We could read a status register to see how many free slots
181 * are actually available, but reading that register resets it
182 * and you could lose some error information.
186 cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
187 | (op->jobid << REQ0_JOBID_SHIFT)
188 | REQ0_WAIT_FOR_WRITE;
191 cr0 |= REQ0_STOP_ON_COMPLETE
192 | REQ0_INT_ON_COMPLETE;
194 if (op->ioc || !cmd_q->free_slots)
195 cr0 |= REQ0_INT_ON_COMPLETE;
197 /* Start at CMD_REQ1 */
198 cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
200 mutex_lock(&ccp->req_mutex);
202 /* Write CMD_REQ1 through CMD_REQx first */
203 for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
204 iowrite32(*(cr + i), cr_addr);
206 /* Tell the CCP to start */
208 iowrite32(cr0, ccp->io_regs + CMD_REQ0);
210 mutex_unlock(&ccp->req_mutex);
212 if (cr0 & REQ0_INT_ON_COMPLETE) {
213 /* Wait for the job to complete */
214 ret = wait_event_interruptible(cmd_q->int_queue,
216 if (ret || cmd_q->cmd_error) {
217 /* On error delete all related jobs from the queue */
218 cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
221 iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
225 } else if (op->soc) {
226 /* Delete just head job from the queue on SoC */
228 | (cmd_q->id << DEL_Q_ID_SHIFT)
231 iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
234 cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
242 static int ccp_perform_aes(struct ccp_op *op)
246 /* Fill out the register contents for REQ1 through REQ6 */
247 cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
248 | (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
249 | (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
250 | (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
251 | (op->ksb_key << REQ1_KEY_KSB_SHIFT);
252 cr[1] = op->src.u.dma.length - 1;
253 cr[2] = ccp_addr_lo(&op->src.u.dma);
254 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
255 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
256 | ccp_addr_hi(&op->src.u.dma);
257 cr[4] = ccp_addr_lo(&op->dst.u.dma);
258 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
259 | ccp_addr_hi(&op->dst.u.dma);
261 if (op->u.aes.mode == CCP_AES_MODE_CFB)
262 cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
270 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
273 static int ccp_perform_xts_aes(struct ccp_op *op)
277 /* Fill out the register contents for REQ1 through REQ6 */
278 cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
279 | (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
280 | (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
281 | (op->ksb_key << REQ1_KEY_KSB_SHIFT);
282 cr[1] = op->src.u.dma.length - 1;
283 cr[2] = ccp_addr_lo(&op->src.u.dma);
284 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
285 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
286 | ccp_addr_hi(&op->src.u.dma);
287 cr[4] = ccp_addr_lo(&op->dst.u.dma);
288 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
289 | ccp_addr_hi(&op->dst.u.dma);
297 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
300 static int ccp_perform_sha(struct ccp_op *op)
304 /* Fill out the register contents for REQ1 through REQ6 */
305 cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
306 | (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
308 cr[1] = op->src.u.dma.length - 1;
309 cr[2] = ccp_addr_lo(&op->src.u.dma);
310 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
311 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
312 | ccp_addr_hi(&op->src.u.dma);
316 cr[4] = lower_32_bits(op->u.sha.msg_bits);
317 cr[5] = upper_32_bits(op->u.sha.msg_bits);
323 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
326 static int ccp_perform_rsa(struct ccp_op *op)
330 /* Fill out the register contents for REQ1 through REQ6 */
331 cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
332 | (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
333 | (op->ksb_key << REQ1_KEY_KSB_SHIFT)
335 cr[1] = op->u.rsa.input_len - 1;
336 cr[2] = ccp_addr_lo(&op->src.u.dma);
337 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
338 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
339 | ccp_addr_hi(&op->src.u.dma);
340 cr[4] = ccp_addr_lo(&op->dst.u.dma);
341 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
342 | ccp_addr_hi(&op->dst.u.dma);
344 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
347 static int ccp_perform_passthru(struct ccp_op *op)
351 /* Fill out the register contents for REQ1 through REQ6 */
352 cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
353 | (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
354 | (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
356 if (op->src.type == CCP_MEMTYPE_SYSTEM)
357 cr[1] = op->src.u.dma.length - 1;
359 cr[1] = op->dst.u.dma.length - 1;
361 if (op->src.type == CCP_MEMTYPE_SYSTEM) {
362 cr[2] = ccp_addr_lo(&op->src.u.dma);
363 cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
364 | ccp_addr_hi(&op->src.u.dma);
366 if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
367 cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
369 cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
370 cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
373 if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
374 cr[4] = ccp_addr_lo(&op->dst.u.dma);
375 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
376 | ccp_addr_hi(&op->dst.u.dma);
378 cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
379 cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
385 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
388 static int ccp_perform_ecc(struct ccp_op *op)
392 /* Fill out the register contents for REQ1 through REQ6 */
393 cr[0] = REQ1_ECC_AFFINE_CONVERT
394 | (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
395 | (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
397 cr[1] = op->src.u.dma.length - 1;
398 cr[2] = ccp_addr_lo(&op->src.u.dma);
399 cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
400 | ccp_addr_hi(&op->src.u.dma);
401 cr[4] = ccp_addr_lo(&op->dst.u.dma);
402 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
403 | ccp_addr_hi(&op->dst.u.dma);
405 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
408 static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
413 mutex_lock(&ccp->ksb_mutex);
415 start = (u32)bitmap_find_next_zero_area(ccp->ksb,
419 if (start <= ccp->ksb_count) {
420 bitmap_set(ccp->ksb, start, count);
422 mutex_unlock(&ccp->ksb_mutex);
428 mutex_unlock(&ccp->ksb_mutex);
430 /* Wait for KSB entries to become available */
431 if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
435 return KSB_START + start;
438 static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
444 mutex_lock(&ccp->ksb_mutex);
446 bitmap_clear(ccp->ksb, start - KSB_START, count);
450 mutex_unlock(&ccp->ksb_mutex);
452 wake_up_interruptible_all(&ccp->ksb_queue);
455 static u32 ccp_gen_jobid(struct ccp_device *ccp)
457 return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
460 static void ccp_sg_free(struct ccp_sg_workarea *wa)
463 dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
468 static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
469 struct scatterlist *sg, u64 len,
470 enum dma_data_direction dma_dir)
472 memset(wa, 0, sizeof(*wa));
478 wa->nents = sg_nents(sg);
479 wa->length = sg->length;
480 wa->bytes_left = len;
486 if (dma_dir == DMA_NONE)
491 wa->dma_dir = dma_dir;
492 wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
500 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
502 unsigned int nbytes = min_t(u64, len, wa->bytes_left);
507 wa->sg_used += nbytes;
508 wa->bytes_left -= nbytes;
509 if (wa->sg_used == wa->sg->length) {
510 wa->sg = sg_next(wa->sg);
515 static void ccp_dm_free(struct ccp_dm_workarea *wa)
517 if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
519 dma_pool_free(wa->dma_pool, wa->address,
523 dma_unmap_single(wa->dev, wa->dma.address, wa->length,
532 static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
533 struct ccp_cmd_queue *cmd_q,
535 enum dma_data_direction dir)
537 memset(wa, 0, sizeof(*wa));
542 wa->dev = cmd_q->ccp->dev;
545 if (len <= CCP_DMAPOOL_MAX_SIZE) {
546 wa->dma_pool = cmd_q->dma_pool;
548 wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
553 wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
555 memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
557 wa->address = kzalloc(len, GFP_KERNEL);
561 wa->dma.address = dma_map_single(wa->dev, wa->address, len,
563 if (!wa->dma.address)
566 wa->dma.length = len;
573 static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
574 struct scatterlist *sg, unsigned int sg_offset,
577 WARN_ON(!wa->address);
579 scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
583 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
584 struct scatterlist *sg, unsigned int sg_offset,
587 WARN_ON(!wa->address);
589 scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
593 static void ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
594 struct scatterlist *sg,
595 unsigned int len, unsigned int se_len,
598 unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
599 u8 buffer[CCP_REVERSE_BUF_SIZE];
601 BUG_ON(se_len > sizeof(buffer));
607 ksb_len = min_t(unsigned int, nbytes, se_len);
608 sg_offset -= ksb_len;
610 scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
611 for (i = 0; i < ksb_len; i++)
612 wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
614 dm_offset += ksb_len;
617 if ((ksb_len != se_len) && sign_extend) {
618 /* Must sign-extend to nearest sign-extend length */
619 if (wa->address[dm_offset - 1] & 0x80)
620 memset(wa->address + dm_offset, 0xff,
626 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
627 struct scatterlist *sg,
630 unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
631 u8 buffer[CCP_REVERSE_BUF_SIZE];
637 ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
638 dm_offset -= ksb_len;
640 for (i = 0; i < ksb_len; i++)
641 buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
642 scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
644 sg_offset += ksb_len;
649 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
651 ccp_dm_free(&data->dm_wa);
652 ccp_sg_free(&data->sg_wa);
655 static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
656 struct scatterlist *sg, u64 sg_len,
658 enum dma_data_direction dir)
662 memset(data, 0, sizeof(*data));
664 ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
669 ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
676 ccp_free_data(data, cmd_q);
681 static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
683 struct ccp_sg_workarea *sg_wa = &data->sg_wa;
684 struct ccp_dm_workarea *dm_wa = &data->dm_wa;
685 unsigned int buf_count, nbytes;
687 /* Clear the buffer if setting it */
689 memset(dm_wa->address, 0, dm_wa->length);
694 /* Perform the copy operation
695 * nbytes will always be <= UINT_MAX because dm_wa->length is
698 nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
699 scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
702 /* Update the structures and generate the count */
704 while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
705 nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
706 dm_wa->length - buf_count);
707 nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
710 ccp_update_sg_workarea(sg_wa, nbytes);
716 static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
718 return ccp_queue_buf(data, 0);
721 static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
723 return ccp_queue_buf(data, 1);
726 static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
727 struct ccp_op *op, unsigned int block_size,
730 unsigned int sg_src_len, sg_dst_len, op_len;
732 /* The CCP can only DMA from/to one address each per operation. This
733 * requires that we find the smallest DMA area between the source
734 * and destination. The resulting len values will always be <= UINT_MAX
735 * because the dma length is an unsigned int.
737 sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
738 sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
741 sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
742 sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
743 op_len = min(sg_src_len, sg_dst_len);
747 /* The data operation length will be at least block_size in length
748 * or the smaller of available sg room remaining for the source or
751 op_len = max(op_len, block_size);
753 /* Unless we have to buffer data, there's no reason to wait */
756 if (sg_src_len < block_size) {
757 /* Not enough data in the sg element, so it
758 * needs to be buffered into a blocksize chunk
760 int cp_len = ccp_fill_queue_buf(src);
763 op->src.u.dma.address = src->dm_wa.dma.address;
764 op->src.u.dma.offset = 0;
765 op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
767 /* Enough data in the sg element, but we need to
768 * adjust for any previously copied data
770 op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
771 op->src.u.dma.offset = src->sg_wa.sg_used;
772 op->src.u.dma.length = op_len & ~(block_size - 1);
774 ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
778 if (sg_dst_len < block_size) {
779 /* Not enough room in the sg element or we're on the
780 * last piece of data (when using padding), so the
781 * output needs to be buffered into a blocksize chunk
784 op->dst.u.dma.address = dst->dm_wa.dma.address;
785 op->dst.u.dma.offset = 0;
786 op->dst.u.dma.length = op->src.u.dma.length;
788 /* Enough room in the sg element, but we need to
789 * adjust for any previously used area
791 op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
792 op->dst.u.dma.offset = dst->sg_wa.sg_used;
793 op->dst.u.dma.length = op->src.u.dma.length;
798 static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
804 if (op->dst.u.dma.address == dst->dm_wa.dma.address)
805 ccp_empty_queue_buf(dst);
807 ccp_update_sg_workarea(&dst->sg_wa,
808 op->dst.u.dma.length);
812 static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
813 struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
814 u32 byte_swap, bool from)
818 memset(&op, 0, sizeof(op));
826 op.src.type = CCP_MEMTYPE_KSB;
828 op.dst.type = CCP_MEMTYPE_SYSTEM;
829 op.dst.u.dma.address = wa->dma.address;
830 op.dst.u.dma.length = wa->length;
832 op.src.type = CCP_MEMTYPE_SYSTEM;
833 op.src.u.dma.address = wa->dma.address;
834 op.src.u.dma.length = wa->length;
835 op.dst.type = CCP_MEMTYPE_KSB;
839 op.u.passthru.byte_swap = byte_swap;
841 return ccp_perform_passthru(&op);
844 static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
845 struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
848 return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
851 static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
852 struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
855 return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
858 static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
861 struct ccp_aes_engine *aes = &cmd->u.aes;
862 struct ccp_dm_workarea key, ctx;
865 unsigned int dm_offset;
868 if (!((aes->key_len == AES_KEYSIZE_128) ||
869 (aes->key_len == AES_KEYSIZE_192) ||
870 (aes->key_len == AES_KEYSIZE_256)))
873 if (aes->src_len & (AES_BLOCK_SIZE - 1))
876 if (aes->iv_len != AES_BLOCK_SIZE)
879 if (!aes->key || !aes->iv || !aes->src)
882 if (aes->cmac_final) {
883 if (aes->cmac_key_len != AES_BLOCK_SIZE)
890 BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
891 BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
894 memset(&op, 0, sizeof(op));
896 op.jobid = ccp_gen_jobid(cmd_q->ccp);
897 op.ksb_key = cmd_q->ksb_key;
898 op.ksb_ctx = cmd_q->ksb_ctx;
900 op.u.aes.type = aes->type;
901 op.u.aes.mode = aes->mode;
902 op.u.aes.action = aes->action;
904 /* All supported key sizes fit in a single (32-byte) KSB entry
905 * and must be in little endian format. Use the 256-bit byte
906 * swap passthru option to convert from big endian to little
909 ret = ccp_init_dm_workarea(&key, cmd_q,
910 CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
915 dm_offset = CCP_KSB_BYTES - aes->key_len;
916 ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
917 ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
918 CCP_PASSTHRU_BYTESWAP_256BIT);
920 cmd->engine_error = cmd_q->cmd_error;
924 /* The AES context fits in a single (32-byte) KSB entry and
925 * must be in little endian format. Use the 256-bit byte swap
926 * passthru option to convert from big endian to little endian.
928 ret = ccp_init_dm_workarea(&ctx, cmd_q,
929 CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
934 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
935 ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
936 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
937 CCP_PASSTHRU_BYTESWAP_256BIT);
939 cmd->engine_error = cmd_q->cmd_error;
943 /* Send data to the CCP AES engine */
944 ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
945 AES_BLOCK_SIZE, DMA_TO_DEVICE);
949 while (src.sg_wa.bytes_left) {
950 ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
951 if (aes->cmac_final && !src.sg_wa.bytes_left) {
954 /* Push the K1/K2 key to the CCP now */
955 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
957 CCP_PASSTHRU_BYTESWAP_256BIT);
959 cmd->engine_error = cmd_q->cmd_error;
963 ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
965 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
966 CCP_PASSTHRU_BYTESWAP_256BIT);
968 cmd->engine_error = cmd_q->cmd_error;
973 ret = ccp_perform_aes(&op);
975 cmd->engine_error = cmd_q->cmd_error;
979 ccp_process_data(&src, NULL, &op);
982 /* Retrieve the AES context - convert from LE to BE using
983 * 32-byte (256-bit) byteswapping
985 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
986 CCP_PASSTHRU_BYTESWAP_256BIT);
988 cmd->engine_error = cmd_q->cmd_error;
992 /* ...but we only need AES_BLOCK_SIZE bytes */
993 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
994 ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
997 ccp_free_data(&src, cmd_q);
1008 static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1010 struct ccp_aes_engine *aes = &cmd->u.aes;
1011 struct ccp_dm_workarea key, ctx;
1012 struct ccp_data src, dst;
1014 unsigned int dm_offset;
1015 bool in_place = false;
1018 if (aes->mode == CCP_AES_MODE_CMAC)
1019 return ccp_run_aes_cmac_cmd(cmd_q, cmd);
1021 if (!((aes->key_len == AES_KEYSIZE_128) ||
1022 (aes->key_len == AES_KEYSIZE_192) ||
1023 (aes->key_len == AES_KEYSIZE_256)))
1026 if (((aes->mode == CCP_AES_MODE_ECB) ||
1027 (aes->mode == CCP_AES_MODE_CBC) ||
1028 (aes->mode == CCP_AES_MODE_CFB)) &&
1029 (aes->src_len & (AES_BLOCK_SIZE - 1)))
1032 if (!aes->key || !aes->src || !aes->dst)
1035 if (aes->mode != CCP_AES_MODE_ECB) {
1036 if (aes->iv_len != AES_BLOCK_SIZE)
1043 BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
1044 BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
1047 memset(&op, 0, sizeof(op));
1049 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1050 op.ksb_key = cmd_q->ksb_key;
1051 op.ksb_ctx = cmd_q->ksb_ctx;
1052 op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
1053 op.u.aes.type = aes->type;
1054 op.u.aes.mode = aes->mode;
1055 op.u.aes.action = aes->action;
1057 /* All supported key sizes fit in a single (32-byte) KSB entry
1058 * and must be in little endian format. Use the 256-bit byte
1059 * swap passthru option to convert from big endian to little
1062 ret = ccp_init_dm_workarea(&key, cmd_q,
1063 CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1068 dm_offset = CCP_KSB_BYTES - aes->key_len;
1069 ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
1070 ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1071 CCP_PASSTHRU_BYTESWAP_256BIT);
1073 cmd->engine_error = cmd_q->cmd_error;
1077 /* The AES context fits in a single (32-byte) KSB entry and
1078 * must be in little endian format. Use the 256-bit byte swap
1079 * passthru option to convert from big endian to little endian.
1081 ret = ccp_init_dm_workarea(&ctx, cmd_q,
1082 CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1087 if (aes->mode != CCP_AES_MODE_ECB) {
1088 /* Load the AES context - conver to LE */
1089 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1090 ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1091 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1092 CCP_PASSTHRU_BYTESWAP_256BIT);
1094 cmd->engine_error = cmd_q->cmd_error;
1099 /* Prepare the input and output data workareas. For in-place
1100 * operations we need to set the dma direction to BIDIRECTIONAL
1101 * and copy the src workarea to the dst workarea.
1103 if (sg_virt(aes->src) == sg_virt(aes->dst))
1106 ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
1108 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1115 ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1116 AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1121 /* Send data to the CCP AES engine */
1122 while (src.sg_wa.bytes_left) {
1123 ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1124 if (!src.sg_wa.bytes_left) {
1127 /* Since we don't retrieve the AES context in ECB
1128 * mode we have to wait for the operation to complete
1129 * on the last piece of data
1131 if (aes->mode == CCP_AES_MODE_ECB)
1135 ret = ccp_perform_aes(&op);
1137 cmd->engine_error = cmd_q->cmd_error;
1141 ccp_process_data(&src, &dst, &op);
1144 if (aes->mode != CCP_AES_MODE_ECB) {
1145 /* Retrieve the AES context - convert from LE to BE using
1146 * 32-byte (256-bit) byteswapping
1148 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1149 CCP_PASSTHRU_BYTESWAP_256BIT);
1151 cmd->engine_error = cmd_q->cmd_error;
1155 /* ...but we only need AES_BLOCK_SIZE bytes */
1156 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1157 ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1162 ccp_free_data(&dst, cmd_q);
1165 ccp_free_data(&src, cmd_q);
1176 static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
1177 struct ccp_cmd *cmd)
1179 struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1180 struct ccp_dm_workarea key, ctx;
1181 struct ccp_data src, dst;
1183 unsigned int unit_size, dm_offset;
1184 bool in_place = false;
1187 switch (xts->unit_size) {
1188 case CCP_XTS_AES_UNIT_SIZE_16:
1191 case CCP_XTS_AES_UNIT_SIZE_512:
1194 case CCP_XTS_AES_UNIT_SIZE_1024:
1197 case CCP_XTS_AES_UNIT_SIZE_2048:
1200 case CCP_XTS_AES_UNIT_SIZE_4096:
1208 if (xts->key_len != AES_KEYSIZE_128)
1211 if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1214 if (xts->iv_len != AES_BLOCK_SIZE)
1217 if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1220 BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
1221 BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
1224 memset(&op, 0, sizeof(op));
1226 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1227 op.ksb_key = cmd_q->ksb_key;
1228 op.ksb_ctx = cmd_q->ksb_ctx;
1230 op.u.xts.action = xts->action;
1231 op.u.xts.unit_size = xts->unit_size;
1233 /* All supported key sizes fit in a single (32-byte) KSB entry
1234 * and must be in little endian format. Use the 256-bit byte
1235 * swap passthru option to convert from big endian to little
1238 ret = ccp_init_dm_workarea(&key, cmd_q,
1239 CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1244 dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
1245 ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1246 ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
1247 ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1248 CCP_PASSTHRU_BYTESWAP_256BIT);
1250 cmd->engine_error = cmd_q->cmd_error;
1254 /* The AES context fits in a single (32-byte) KSB entry and
1255 * for XTS is already in little endian format so no byte swapping
1258 ret = ccp_init_dm_workarea(&ctx, cmd_q,
1259 CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1264 ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1265 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1266 CCP_PASSTHRU_BYTESWAP_NOOP);
1268 cmd->engine_error = cmd_q->cmd_error;
1272 /* Prepare the input and output data workareas. For in-place
1273 * operations we need to set the dma direction to BIDIRECTIONAL
1274 * and copy the src workarea to the dst workarea.
1276 if (sg_virt(xts->src) == sg_virt(xts->dst))
1279 ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1281 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1288 ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1289 unit_size, DMA_FROM_DEVICE);
1294 /* Send data to the CCP AES engine */
1295 while (src.sg_wa.bytes_left) {
1296 ccp_prepare_data(&src, &dst, &op, unit_size, true);
1297 if (!src.sg_wa.bytes_left)
1300 ret = ccp_perform_xts_aes(&op);
1302 cmd->engine_error = cmd_q->cmd_error;
1306 ccp_process_data(&src, &dst, &op);
1309 /* Retrieve the AES context - convert from LE to BE using
1310 * 32-byte (256-bit) byteswapping
1312 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1313 CCP_PASSTHRU_BYTESWAP_256BIT);
1315 cmd->engine_error = cmd_q->cmd_error;
1319 /* ...but we only need AES_BLOCK_SIZE bytes */
1320 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1321 ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1325 ccp_free_data(&dst, cmd_q);
1328 ccp_free_data(&src, cmd_q);
1339 static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1341 struct ccp_sha_engine *sha = &cmd->u.sha;
1342 struct ccp_dm_workarea ctx;
1343 struct ccp_data src;
1347 if (sha->ctx_len != CCP_SHA_CTXSIZE)
1353 if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
1356 if (!sha->src_len) {
1359 /* Not final, just return */
1363 /* CCP can't do a zero length sha operation so the caller
1364 * must buffer the data.
1369 /* A sha operation for a message with a total length of zero,
1370 * return known result.
1372 switch (sha->type) {
1373 case CCP_SHA_TYPE_1:
1374 sha_zero = ccp_sha1_zero;
1376 case CCP_SHA_TYPE_224:
1377 sha_zero = ccp_sha224_zero;
1379 case CCP_SHA_TYPE_256:
1380 sha_zero = ccp_sha256_zero;
1386 scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1395 BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
1397 memset(&op, 0, sizeof(op));
1399 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1400 op.ksb_ctx = cmd_q->ksb_ctx;
1401 op.u.sha.type = sha->type;
1402 op.u.sha.msg_bits = sha->msg_bits;
1404 /* The SHA context fits in a single (32-byte) KSB entry and
1405 * must be in little endian format. Use the 256-bit byte swap
1406 * passthru option to convert from big endian to little endian.
1408 ret = ccp_init_dm_workarea(&ctx, cmd_q,
1409 CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
1414 ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1415 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1416 CCP_PASSTHRU_BYTESWAP_256BIT);
1418 cmd->engine_error = cmd_q->cmd_error;
1422 /* Send data to the CCP SHA engine */
1423 ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1424 CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
1428 while (src.sg_wa.bytes_left) {
1429 ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
1430 if (sha->final && !src.sg_wa.bytes_left)
1433 ret = ccp_perform_sha(&op);
1435 cmd->engine_error = cmd_q->cmd_error;
1439 ccp_process_data(&src, NULL, &op);
1442 /* Retrieve the SHA context - convert from LE to BE using
1443 * 32-byte (256-bit) byteswapping to BE
1445 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1446 CCP_PASSTHRU_BYTESWAP_256BIT);
1448 cmd->engine_error = cmd_q->cmd_error;
1452 ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1455 ccp_free_data(&src, cmd_q);
1463 static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1465 struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1466 struct ccp_dm_workarea exp, src;
1467 struct ccp_data dst;
1469 unsigned int ksb_count, i_len, o_len;
1472 if (rsa->key_size > CCP_RSA_MAX_WIDTH)
1475 if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1478 /* The RSA modulus must precede the message being acted upon, so
1479 * it must be copied to a DMA area where the message and the
1480 * modulus can be concatenated. Therefore the input buffer
1481 * length required is twice the output buffer length (which
1482 * must be a multiple of 256-bits).
1484 o_len = ((rsa->key_size + 255) / 256) * 32;
1487 ksb_count = o_len / CCP_KSB_BYTES;
1489 memset(&op, 0, sizeof(op));
1491 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1492 op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
1496 /* The RSA exponent may span multiple (32-byte) KSB entries and must
1497 * be in little endian format. Reverse copy each 32-byte chunk
1498 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
1499 * and each byte within that chunk and do not perform any byte swap
1500 * operations on the passthru operation.
1502 ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1506 ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len, CCP_KSB_BYTES,
1508 ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
1509 CCP_PASSTHRU_BYTESWAP_NOOP);
1511 cmd->engine_error = cmd_q->cmd_error;
1515 /* Concatenate the modulus and the message. Both the modulus and
1516 * the operands must be in little endian format. Since the input
1517 * is in big endian format it must be converted.
1519 ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1523 ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len, CCP_KSB_BYTES,
1525 src.address += o_len; /* Adjust the address for the copy operation */
1526 ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len, CCP_KSB_BYTES,
1528 src.address -= o_len; /* Reset the address to original value */
1530 /* Prepare the output area for the operation */
1531 ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
1532 o_len, DMA_FROM_DEVICE);
1537 op.src.u.dma.address = src.dma.address;
1538 op.src.u.dma.offset = 0;
1539 op.src.u.dma.length = i_len;
1540 op.dst.u.dma.address = dst.dm_wa.dma.address;
1541 op.dst.u.dma.offset = 0;
1542 op.dst.u.dma.length = o_len;
1544 op.u.rsa.mod_size = rsa->key_size;
1545 op.u.rsa.input_len = i_len;
1547 ret = ccp_perform_rsa(&op);
1549 cmd->engine_error = cmd_q->cmd_error;
1553 ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len);
1556 ccp_free_data(&dst, cmd_q);
1565 ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
1570 static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
1571 struct ccp_cmd *cmd)
1573 struct ccp_passthru_engine *pt = &cmd->u.passthru;
1574 struct ccp_dm_workarea mask;
1575 struct ccp_data src, dst;
1577 bool in_place = false;
1581 if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1584 if (!pt->src || !pt->dst)
1587 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1588 if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1594 BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
1596 memset(&op, 0, sizeof(op));
1598 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1600 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1602 op.ksb_key = cmd_q->ksb_key;
1604 ret = ccp_init_dm_workarea(&mask, cmd_q,
1605 CCP_PASSTHRU_KSB_COUNT *
1611 ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1612 ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
1613 CCP_PASSTHRU_BYTESWAP_NOOP);
1615 cmd->engine_error = cmd_q->cmd_error;
1620 /* Prepare the input and output data workareas. For in-place
1621 * operations we need to set the dma direction to BIDIRECTIONAL
1622 * and copy the src workarea to the dst workarea.
1624 if (sg_virt(pt->src) == sg_virt(pt->dst))
1627 ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
1628 CCP_PASSTHRU_MASKSIZE,
1629 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1636 ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
1637 CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
1642 /* Send data to the CCP Passthru engine
1643 * Because the CCP engine works on a single source and destination
1644 * dma address at a time, each entry in the source scatterlist
1645 * (after the dma_map_sg call) must be less than or equal to the
1646 * (remaining) length in the destination scatterlist entry and the
1647 * length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
1649 dst.sg_wa.sg_used = 0;
1650 for (i = 1; i <= src.sg_wa.dma_count; i++) {
1651 if (!dst.sg_wa.sg ||
1652 (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
1657 if (i == src.sg_wa.dma_count) {
1662 op.src.type = CCP_MEMTYPE_SYSTEM;
1663 op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
1664 op.src.u.dma.offset = 0;
1665 op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
1667 op.dst.type = CCP_MEMTYPE_SYSTEM;
1668 op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
1669 op.src.u.dma.offset = dst.sg_wa.sg_used;
1670 op.src.u.dma.length = op.src.u.dma.length;
1672 ret = ccp_perform_passthru(&op);
1674 cmd->engine_error = cmd_q->cmd_error;
1678 dst.sg_wa.sg_used += src.sg_wa.sg->length;
1679 if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
1680 dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
1681 dst.sg_wa.sg_used = 0;
1683 src.sg_wa.sg = sg_next(src.sg_wa.sg);
1688 ccp_free_data(&dst, cmd_q);
1691 ccp_free_data(&src, cmd_q);
1694 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
1700 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1702 struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1703 struct ccp_dm_workarea src, dst;
1708 if (!ecc->u.mm.operand_1 ||
1709 (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
1712 if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
1713 if (!ecc->u.mm.operand_2 ||
1714 (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
1717 if (!ecc->u.mm.result ||
1718 (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
1721 memset(&op, 0, sizeof(op));
1723 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1725 /* Concatenate the modulus and the operands. Both the modulus and
1726 * the operands must be in little endian format. Since the input
1727 * is in big endian format it must be converted and placed in a
1728 * fixed length buffer.
1730 ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1735 /* Save the workarea address since it is updated in order to perform
1740 /* Copy the ECC modulus */
1741 ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1742 CCP_ECC_OPERAND_SIZE, true);
1743 src.address += CCP_ECC_OPERAND_SIZE;
1745 /* Copy the first operand */
1746 ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
1747 ecc->u.mm.operand_1_len,
1748 CCP_ECC_OPERAND_SIZE, true);
1749 src.address += CCP_ECC_OPERAND_SIZE;
1751 if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
1752 /* Copy the second operand */
1753 ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
1754 ecc->u.mm.operand_2_len,
1755 CCP_ECC_OPERAND_SIZE, true);
1756 src.address += CCP_ECC_OPERAND_SIZE;
1759 /* Restore the workarea address */
1762 /* Prepare the output area for the operation */
1763 ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1769 op.src.u.dma.address = src.dma.address;
1770 op.src.u.dma.offset = 0;
1771 op.src.u.dma.length = src.length;
1772 op.dst.u.dma.address = dst.dma.address;
1773 op.dst.u.dma.offset = 0;
1774 op.dst.u.dma.length = dst.length;
1776 op.u.ecc.function = cmd->u.ecc.function;
1778 ret = ccp_perform_ecc(&op);
1780 cmd->engine_error = cmd_q->cmd_error;
1784 ecc->ecc_result = le16_to_cpup(
1785 (const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1786 if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1791 /* Save the ECC result */
1792 ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES);
1803 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1805 struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1806 struct ccp_dm_workarea src, dst;
1811 if (!ecc->u.pm.point_1.x ||
1812 (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
1813 !ecc->u.pm.point_1.y ||
1814 (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
1817 if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1818 if (!ecc->u.pm.point_2.x ||
1819 (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
1820 !ecc->u.pm.point_2.y ||
1821 (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
1824 if (!ecc->u.pm.domain_a ||
1825 (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
1828 if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
1829 if (!ecc->u.pm.scalar ||
1830 (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
1834 if (!ecc->u.pm.result.x ||
1835 (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
1836 !ecc->u.pm.result.y ||
1837 (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
1840 memset(&op, 0, sizeof(op));
1842 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1844 /* Concatenate the modulus and the operands. Both the modulus and
1845 * the operands must be in little endian format. Since the input
1846 * is in big endian format it must be converted and placed in a
1847 * fixed length buffer.
1849 ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1854 /* Save the workarea address since it is updated in order to perform
1859 /* Copy the ECC modulus */
1860 ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1861 CCP_ECC_OPERAND_SIZE, true);
1862 src.address += CCP_ECC_OPERAND_SIZE;
1864 /* Copy the first point X and Y coordinate */
1865 ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
1866 ecc->u.pm.point_1.x_len,
1867 CCP_ECC_OPERAND_SIZE, true);
1868 src.address += CCP_ECC_OPERAND_SIZE;
1869 ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
1870 ecc->u.pm.point_1.y_len,
1871 CCP_ECC_OPERAND_SIZE, true);
1872 src.address += CCP_ECC_OPERAND_SIZE;
1874 /* Set the first point Z coordianate to 1 */
1875 *(src.address) = 0x01;
1876 src.address += CCP_ECC_OPERAND_SIZE;
1878 if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1879 /* Copy the second point X and Y coordinate */
1880 ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
1881 ecc->u.pm.point_2.x_len,
1882 CCP_ECC_OPERAND_SIZE, true);
1883 src.address += CCP_ECC_OPERAND_SIZE;
1884 ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
1885 ecc->u.pm.point_2.y_len,
1886 CCP_ECC_OPERAND_SIZE, true);
1887 src.address += CCP_ECC_OPERAND_SIZE;
1889 /* Set the second point Z coordianate to 1 */
1890 *(src.address) = 0x01;
1891 src.address += CCP_ECC_OPERAND_SIZE;
1893 /* Copy the Domain "a" parameter */
1894 ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
1895 ecc->u.pm.domain_a_len,
1896 CCP_ECC_OPERAND_SIZE, true);
1897 src.address += CCP_ECC_OPERAND_SIZE;
1899 if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
1900 /* Copy the scalar value */
1901 ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
1902 ecc->u.pm.scalar_len,
1903 CCP_ECC_OPERAND_SIZE, true);
1904 src.address += CCP_ECC_OPERAND_SIZE;
1908 /* Restore the workarea address */
1911 /* Prepare the output area for the operation */
1912 ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1918 op.src.u.dma.address = src.dma.address;
1919 op.src.u.dma.offset = 0;
1920 op.src.u.dma.length = src.length;
1921 op.dst.u.dma.address = dst.dma.address;
1922 op.dst.u.dma.offset = 0;
1923 op.dst.u.dma.length = dst.length;
1925 op.u.ecc.function = cmd->u.ecc.function;
1927 ret = ccp_perform_ecc(&op);
1929 cmd->engine_error = cmd_q->cmd_error;
1933 ecc->ecc_result = le16_to_cpup(
1934 (const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1935 if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1940 /* Save the workarea address since it is updated as we walk through
1941 * to copy the point math result
1945 /* Save the ECC result X and Y coordinates */
1946 ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x,
1947 CCP_ECC_MODULUS_BYTES);
1948 dst.address += CCP_ECC_OUTPUT_SIZE;
1949 ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y,
1950 CCP_ECC_MODULUS_BYTES);
1951 dst.address += CCP_ECC_OUTPUT_SIZE;
1953 /* Restore the workarea address */
1965 static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1967 struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1969 ecc->ecc_result = 0;
1972 (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
1975 switch (ecc->function) {
1976 case CCP_ECC_FUNCTION_MMUL_384BIT:
1977 case CCP_ECC_FUNCTION_MADD_384BIT:
1978 case CCP_ECC_FUNCTION_MINV_384BIT:
1979 return ccp_run_ecc_mm_cmd(cmd_q, cmd);
1981 case CCP_ECC_FUNCTION_PADD_384BIT:
1982 case CCP_ECC_FUNCTION_PMUL_384BIT:
1983 case CCP_ECC_FUNCTION_PDBL_384BIT:
1984 return ccp_run_ecc_pm_cmd(cmd_q, cmd);
1991 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1995 cmd->engine_error = 0;
1996 cmd_q->cmd_error = 0;
1997 cmd_q->int_rcvd = 0;
1998 cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
2000 switch (cmd->engine) {
2001 case CCP_ENGINE_AES:
2002 ret = ccp_run_aes_cmd(cmd_q, cmd);
2004 case CCP_ENGINE_XTS_AES_128:
2005 ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2007 case CCP_ENGINE_SHA:
2008 ret = ccp_run_sha_cmd(cmd_q, cmd);
2010 case CCP_ENGINE_RSA:
2011 ret = ccp_run_rsa_cmd(cmd_q, cmd);
2013 case CCP_ENGINE_PASSTHRU:
2014 ret = ccp_run_passthru_cmd(cmd_q, cmd);
2016 case CCP_ENGINE_ECC:
2017 ret = ccp_run_ecc_cmd(cmd_q, cmd);