2 * AMD Cryptographic Coprocessor (CCP) driver
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/pci_ids.h>
17 #include <linux/kthread.h>
18 #include <linux/sched.h>
19 #include <linux/interrupt.h>
20 #include <linux/spinlock.h>
21 #include <linux/mutex.h>
22 #include <linux/delay.h>
23 #include <linux/ccp.h>
24 #include <linux/scatterlist.h>
25 #include <crypto/scatterwalk.h>
31 CCP_MEMTYPE_SYSTEM = 0,
41 enum dma_data_direction dir;
44 struct ccp_dm_workarea {
46 struct dma_pool *dma_pool;
50 struct ccp_dma_info dma;
53 struct ccp_sg_workarea {
54 struct scatterlist *sg;
58 struct scatterlist *dma_sg;
59 struct device *dma_dev;
60 unsigned int dma_count;
61 enum dma_data_direction dma_dir;
69 struct ccp_sg_workarea sg_wa;
70 struct ccp_dm_workarea dm_wa;
74 enum ccp_memtype type;
76 struct ccp_dma_info dma;
82 enum ccp_aes_type type;
83 enum ccp_aes_mode mode;
84 enum ccp_aes_action action;
87 struct ccp_xts_aes_op {
88 enum ccp_aes_action action;
89 enum ccp_xts_aes_unit_size unit_size;
93 enum ccp_sha_type type;
102 struct ccp_passthru_op {
103 enum ccp_passthru_bitwise bit_mod;
104 enum ccp_passthru_byteswap byte_swap;
108 enum ccp_ecc_function function;
112 struct ccp_cmd_queue *cmd_q;
126 struct ccp_aes_op aes;
127 struct ccp_xts_aes_op xts;
128 struct ccp_sha_op sha;
129 struct ccp_rsa_op rsa;
130 struct ccp_passthru_op passthru;
131 struct ccp_ecc_op ecc;
135 /* The CCP cannot perform zero-length sha operations so the caller
136 * is required to buffer data for the final operation. However, a
137 * sha operation for a message with a total length of zero is valid
138 * so known values are required to supply the result.
140 static const u8 ccp_sha1_zero[CCP_SHA_CTXSIZE] = {
141 0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d,
142 0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90,
143 0xaf, 0xd8, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00,
144 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
147 static const u8 ccp_sha224_zero[CCP_SHA_CTXSIZE] = {
148 0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9,
149 0x47, 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4,
150 0x15, 0xa2, 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a,
151 0xc5, 0xb3, 0xe4, 0x2f, 0x00, 0x00, 0x00, 0x00,
154 static const u8 ccp_sha256_zero[CCP_SHA_CTXSIZE] = {
155 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
156 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
157 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
158 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55,
161 static u32 ccp_addr_lo(struct ccp_dma_info *info)
163 return lower_32_bits(info->address + info->offset);
166 static u32 ccp_addr_hi(struct ccp_dma_info *info)
168 return upper_32_bits(info->address + info->offset) & 0x0000ffff;
171 static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
173 struct ccp_cmd_queue *cmd_q = op->cmd_q;
174 struct ccp_device *ccp = cmd_q->ccp;
175 void __iomem *cr_addr;
180 /* We could read a status register to see how many free slots
181 * are actually available, but reading that register resets it
182 * and you could lose some error information.
186 cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
187 | (op->jobid << REQ0_JOBID_SHIFT)
188 | REQ0_WAIT_FOR_WRITE;
191 cr0 |= REQ0_STOP_ON_COMPLETE
192 | REQ0_INT_ON_COMPLETE;
194 if (op->ioc || !cmd_q->free_slots)
195 cr0 |= REQ0_INT_ON_COMPLETE;
197 /* Start at CMD_REQ1 */
198 cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
200 mutex_lock(&ccp->req_mutex);
202 /* Write CMD_REQ1 through CMD_REQx first */
203 for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
204 iowrite32(*(cr + i), cr_addr);
206 /* Tell the CCP to start */
208 iowrite32(cr0, ccp->io_regs + CMD_REQ0);
210 mutex_unlock(&ccp->req_mutex);
212 if (cr0 & REQ0_INT_ON_COMPLETE) {
213 /* Wait for the job to complete */
214 ret = wait_event_interruptible(cmd_q->int_queue,
216 if (ret || cmd_q->cmd_error) {
217 /* On error delete all related jobs from the queue */
218 cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
221 iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
225 } else if (op->soc) {
226 /* Delete just head job from the queue on SoC */
228 | (cmd_q->id << DEL_Q_ID_SHIFT)
231 iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
234 cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
242 static int ccp_perform_aes(struct ccp_op *op)
246 /* Fill out the register contents for REQ1 through REQ6 */
247 cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
248 | (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
249 | (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
250 | (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
251 | (op->ksb_key << REQ1_KEY_KSB_SHIFT);
252 cr[1] = op->src.u.dma.length - 1;
253 cr[2] = ccp_addr_lo(&op->src.u.dma);
254 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
255 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
256 | ccp_addr_hi(&op->src.u.dma);
257 cr[4] = ccp_addr_lo(&op->dst.u.dma);
258 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
259 | ccp_addr_hi(&op->dst.u.dma);
261 if (op->u.aes.mode == CCP_AES_MODE_CFB)
262 cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
270 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
273 static int ccp_perform_xts_aes(struct ccp_op *op)
277 /* Fill out the register contents for REQ1 through REQ6 */
278 cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
279 | (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
280 | (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
281 | (op->ksb_key << REQ1_KEY_KSB_SHIFT);
282 cr[1] = op->src.u.dma.length - 1;
283 cr[2] = ccp_addr_lo(&op->src.u.dma);
284 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
285 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
286 | ccp_addr_hi(&op->src.u.dma);
287 cr[4] = ccp_addr_lo(&op->dst.u.dma);
288 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
289 | ccp_addr_hi(&op->dst.u.dma);
297 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
300 static int ccp_perform_sha(struct ccp_op *op)
304 /* Fill out the register contents for REQ1 through REQ6 */
305 cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
306 | (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
308 cr[1] = op->src.u.dma.length - 1;
309 cr[2] = ccp_addr_lo(&op->src.u.dma);
310 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
311 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
312 | ccp_addr_hi(&op->src.u.dma);
316 cr[4] = lower_32_bits(op->u.sha.msg_bits);
317 cr[5] = upper_32_bits(op->u.sha.msg_bits);
323 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
326 static int ccp_perform_rsa(struct ccp_op *op)
330 /* Fill out the register contents for REQ1 through REQ6 */
331 cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
332 | (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
333 | (op->ksb_key << REQ1_KEY_KSB_SHIFT)
335 cr[1] = op->u.rsa.input_len - 1;
336 cr[2] = ccp_addr_lo(&op->src.u.dma);
337 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
338 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
339 | ccp_addr_hi(&op->src.u.dma);
340 cr[4] = ccp_addr_lo(&op->dst.u.dma);
341 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
342 | ccp_addr_hi(&op->dst.u.dma);
344 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
347 static int ccp_perform_passthru(struct ccp_op *op)
351 /* Fill out the register contents for REQ1 through REQ6 */
352 cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
353 | (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
354 | (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
356 if (op->src.type == CCP_MEMTYPE_SYSTEM)
357 cr[1] = op->src.u.dma.length - 1;
359 cr[1] = op->dst.u.dma.length - 1;
361 if (op->src.type == CCP_MEMTYPE_SYSTEM) {
362 cr[2] = ccp_addr_lo(&op->src.u.dma);
363 cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
364 | ccp_addr_hi(&op->src.u.dma);
366 if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
367 cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
369 cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
370 cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
373 if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
374 cr[4] = ccp_addr_lo(&op->dst.u.dma);
375 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
376 | ccp_addr_hi(&op->dst.u.dma);
378 cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
379 cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
385 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
388 static int ccp_perform_ecc(struct ccp_op *op)
392 /* Fill out the register contents for REQ1 through REQ6 */
393 cr[0] = REQ1_ECC_AFFINE_CONVERT
394 | (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
395 | (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
397 cr[1] = op->src.u.dma.length - 1;
398 cr[2] = ccp_addr_lo(&op->src.u.dma);
399 cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
400 | ccp_addr_hi(&op->src.u.dma);
401 cr[4] = ccp_addr_lo(&op->dst.u.dma);
402 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
403 | ccp_addr_hi(&op->dst.u.dma);
405 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
408 static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
413 mutex_lock(&ccp->ksb_mutex);
415 start = (u32)bitmap_find_next_zero_area(ccp->ksb,
419 if (start <= ccp->ksb_count) {
420 bitmap_set(ccp->ksb, start, count);
422 mutex_unlock(&ccp->ksb_mutex);
428 mutex_unlock(&ccp->ksb_mutex);
430 /* Wait for KSB entries to become available */
431 if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
435 return KSB_START + start;
438 static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
444 mutex_lock(&ccp->ksb_mutex);
446 bitmap_clear(ccp->ksb, start - KSB_START, count);
450 mutex_unlock(&ccp->ksb_mutex);
452 wake_up_interruptible_all(&ccp->ksb_queue);
455 static u32 ccp_gen_jobid(struct ccp_device *ccp)
457 return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
460 static void ccp_sg_free(struct ccp_sg_workarea *wa)
463 dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
468 static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
469 struct scatterlist *sg, unsigned int len,
470 enum dma_data_direction dma_dir)
472 memset(wa, 0, sizeof(*wa));
478 wa->nents = sg_nents(sg);
479 wa->length = sg->length;
480 wa->bytes_left = len;
486 if (dma_dir == DMA_NONE)
491 wa->dma_dir = dma_dir;
492 wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
500 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
502 unsigned int nbytes = min(len, wa->bytes_left);
507 wa->sg_used += nbytes;
508 wa->bytes_left -= nbytes;
509 if (wa->sg_used == wa->sg->length) {
510 wa->sg = sg_next(wa->sg);
515 static void ccp_dm_free(struct ccp_dm_workarea *wa)
517 if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
519 dma_pool_free(wa->dma_pool, wa->address,
523 dma_unmap_single(wa->dev, wa->dma.address, wa->length,
532 static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
533 struct ccp_cmd_queue *cmd_q,
535 enum dma_data_direction dir)
537 memset(wa, 0, sizeof(*wa));
542 wa->dev = cmd_q->ccp->dev;
545 if (len <= CCP_DMAPOOL_MAX_SIZE) {
546 wa->dma_pool = cmd_q->dma_pool;
548 wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
553 wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
555 memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
557 wa->address = kzalloc(len, GFP_KERNEL);
561 wa->dma.address = dma_map_single(wa->dev, wa->address, len,
563 if (!wa->dma.address)
566 wa->dma.length = len;
573 static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
574 struct scatterlist *sg, unsigned int sg_offset,
577 WARN_ON(!wa->address);
579 scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
583 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
584 struct scatterlist *sg, unsigned int sg_offset,
587 WARN_ON(!wa->address);
589 scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
593 static void ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
594 struct scatterlist *sg,
595 unsigned int len, unsigned int se_len,
598 unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
599 u8 buffer[CCP_REVERSE_BUF_SIZE];
601 BUG_ON(se_len > sizeof(buffer));
607 ksb_len = min_t(unsigned int, nbytes, se_len);
608 sg_offset -= ksb_len;
610 scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
611 for (i = 0; i < ksb_len; i++)
612 wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
614 dm_offset += ksb_len;
617 if ((ksb_len != se_len) && sign_extend) {
618 /* Must sign-extend to nearest sign-extend length */
619 if (wa->address[dm_offset - 1] & 0x80)
620 memset(wa->address + dm_offset, 0xff,
626 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
627 struct scatterlist *sg,
630 unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
631 u8 buffer[CCP_REVERSE_BUF_SIZE];
637 ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
638 dm_offset -= ksb_len;
640 for (i = 0; i < ksb_len; i++)
641 buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
642 scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
644 sg_offset += ksb_len;
649 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
651 ccp_dm_free(&data->dm_wa);
652 ccp_sg_free(&data->sg_wa);
655 static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
656 struct scatterlist *sg, unsigned int sg_len,
658 enum dma_data_direction dir)
662 memset(data, 0, sizeof(*data));
664 ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
669 ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
676 ccp_free_data(data, cmd_q);
681 static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
683 struct ccp_sg_workarea *sg_wa = &data->sg_wa;
684 struct ccp_dm_workarea *dm_wa = &data->dm_wa;
685 unsigned int buf_count, nbytes;
687 /* Clear the buffer if setting it */
689 memset(dm_wa->address, 0, dm_wa->length);
694 /* Perform the copy operation */
695 nbytes = min(sg_wa->bytes_left, dm_wa->length);
696 scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
699 /* Update the structures and generate the count */
701 while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
702 nbytes = min3(sg_wa->sg->length - sg_wa->sg_used,
703 dm_wa->length - buf_count,
707 ccp_update_sg_workarea(sg_wa, nbytes);
713 static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
715 return ccp_queue_buf(data, 0);
718 static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
720 return ccp_queue_buf(data, 1);
723 static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
724 struct ccp_op *op, unsigned int block_size,
727 unsigned int sg_src_len, sg_dst_len, op_len;
729 /* The CCP can only DMA from/to one address each per operation. This
730 * requires that we find the smallest DMA area between the source
733 sg_src_len = min(sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used,
734 src->sg_wa.bytes_left);
737 sg_dst_len = min(sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used,
738 src->sg_wa.bytes_left);
739 op_len = min(sg_src_len, sg_dst_len);
743 /* The data operation length will be at least block_size in length
744 * or the smaller of available sg room remaining for the source or
747 op_len = max(op_len, block_size);
749 /* Unless we have to buffer data, there's no reason to wait */
752 if (sg_src_len < block_size) {
753 /* Not enough data in the sg element, so it
754 * needs to be buffered into a blocksize chunk
756 int cp_len = ccp_fill_queue_buf(src);
759 op->src.u.dma.address = src->dm_wa.dma.address;
760 op->src.u.dma.offset = 0;
761 op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
763 /* Enough data in the sg element, but we need to
764 * adjust for any previously copied data
766 op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
767 op->src.u.dma.offset = src->sg_wa.sg_used;
768 op->src.u.dma.length = op_len & ~(block_size - 1);
770 ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
774 if (sg_dst_len < block_size) {
775 /* Not enough room in the sg element or we're on the
776 * last piece of data (when using padding), so the
777 * output needs to be buffered into a blocksize chunk
780 op->dst.u.dma.address = dst->dm_wa.dma.address;
781 op->dst.u.dma.offset = 0;
782 op->dst.u.dma.length = op->src.u.dma.length;
784 /* Enough room in the sg element, but we need to
785 * adjust for any previously used area
787 op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
788 op->dst.u.dma.offset = dst->sg_wa.sg_used;
789 op->dst.u.dma.length = op->src.u.dma.length;
794 static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
800 if (op->dst.u.dma.address == dst->dm_wa.dma.address)
801 ccp_empty_queue_buf(dst);
803 ccp_update_sg_workarea(&dst->sg_wa,
804 op->dst.u.dma.length);
808 static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
809 struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
810 u32 byte_swap, bool from)
814 memset(&op, 0, sizeof(op));
822 op.src.type = CCP_MEMTYPE_KSB;
824 op.dst.type = CCP_MEMTYPE_SYSTEM;
825 op.dst.u.dma.address = wa->dma.address;
826 op.dst.u.dma.length = wa->length;
828 op.src.type = CCP_MEMTYPE_SYSTEM;
829 op.src.u.dma.address = wa->dma.address;
830 op.src.u.dma.length = wa->length;
831 op.dst.type = CCP_MEMTYPE_KSB;
835 op.u.passthru.byte_swap = byte_swap;
837 return ccp_perform_passthru(&op);
840 static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
841 struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
844 return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
847 static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
848 struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
851 return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
854 static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
857 struct ccp_aes_engine *aes = &cmd->u.aes;
858 struct ccp_dm_workarea key, ctx;
861 unsigned int dm_offset;
864 if (!((aes->key_len == AES_KEYSIZE_128) ||
865 (aes->key_len == AES_KEYSIZE_192) ||
866 (aes->key_len == AES_KEYSIZE_256)))
869 if (aes->src_len & (AES_BLOCK_SIZE - 1))
872 if (aes->iv_len != AES_BLOCK_SIZE)
875 if (!aes->key || !aes->iv || !aes->src)
878 if (aes->cmac_final) {
879 if (aes->cmac_key_len != AES_BLOCK_SIZE)
886 BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
887 BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
890 memset(&op, 0, sizeof(op));
892 op.jobid = ccp_gen_jobid(cmd_q->ccp);
893 op.ksb_key = cmd_q->ksb_key;
894 op.ksb_ctx = cmd_q->ksb_ctx;
896 op.u.aes.type = aes->type;
897 op.u.aes.mode = aes->mode;
898 op.u.aes.action = aes->action;
900 /* All supported key sizes fit in a single (32-byte) KSB entry
901 * and must be in little endian format. Use the 256-bit byte
902 * swap passthru option to convert from big endian to little
905 ret = ccp_init_dm_workarea(&key, cmd_q,
906 CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
911 dm_offset = CCP_KSB_BYTES - aes->key_len;
912 ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
913 ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
914 CCP_PASSTHRU_BYTESWAP_256BIT);
916 cmd->engine_error = cmd_q->cmd_error;
920 /* The AES context fits in a single (32-byte) KSB entry and
921 * must be in little endian format. Use the 256-bit byte swap
922 * passthru option to convert from big endian to little endian.
924 ret = ccp_init_dm_workarea(&ctx, cmd_q,
925 CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
930 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
931 ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
932 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
933 CCP_PASSTHRU_BYTESWAP_256BIT);
935 cmd->engine_error = cmd_q->cmd_error;
939 /* Send data to the CCP AES engine */
940 ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
941 AES_BLOCK_SIZE, DMA_TO_DEVICE);
945 while (src.sg_wa.bytes_left) {
946 ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
947 if (aes->cmac_final && !src.sg_wa.bytes_left) {
950 /* Push the K1/K2 key to the CCP now */
951 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
953 CCP_PASSTHRU_BYTESWAP_256BIT);
955 cmd->engine_error = cmd_q->cmd_error;
959 ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
961 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
962 CCP_PASSTHRU_BYTESWAP_256BIT);
964 cmd->engine_error = cmd_q->cmd_error;
969 ret = ccp_perform_aes(&op);
971 cmd->engine_error = cmd_q->cmd_error;
975 ccp_process_data(&src, NULL, &op);
978 /* Retrieve the AES context - convert from LE to BE using
979 * 32-byte (256-bit) byteswapping
981 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
982 CCP_PASSTHRU_BYTESWAP_256BIT);
984 cmd->engine_error = cmd_q->cmd_error;
988 /* ...but we only need AES_BLOCK_SIZE bytes */
989 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
990 ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
993 ccp_free_data(&src, cmd_q);
1004 static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1006 struct ccp_aes_engine *aes = &cmd->u.aes;
1007 struct ccp_dm_workarea key, ctx;
1008 struct ccp_data src, dst;
1010 unsigned int dm_offset;
1011 bool in_place = false;
1014 if (aes->mode == CCP_AES_MODE_CMAC)
1015 return ccp_run_aes_cmac_cmd(cmd_q, cmd);
1017 if (!((aes->key_len == AES_KEYSIZE_128) ||
1018 (aes->key_len == AES_KEYSIZE_192) ||
1019 (aes->key_len == AES_KEYSIZE_256)))
1022 if (((aes->mode == CCP_AES_MODE_ECB) ||
1023 (aes->mode == CCP_AES_MODE_CBC) ||
1024 (aes->mode == CCP_AES_MODE_CFB)) &&
1025 (aes->src_len & (AES_BLOCK_SIZE - 1)))
1028 if (!aes->key || !aes->src || !aes->dst)
1031 if (aes->mode != CCP_AES_MODE_ECB) {
1032 if (aes->iv_len != AES_BLOCK_SIZE)
1039 BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
1040 BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
1043 memset(&op, 0, sizeof(op));
1045 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1046 op.ksb_key = cmd_q->ksb_key;
1047 op.ksb_ctx = cmd_q->ksb_ctx;
1048 op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
1049 op.u.aes.type = aes->type;
1050 op.u.aes.mode = aes->mode;
1051 op.u.aes.action = aes->action;
1053 /* All supported key sizes fit in a single (32-byte) KSB entry
1054 * and must be in little endian format. Use the 256-bit byte
1055 * swap passthru option to convert from big endian to little
1058 ret = ccp_init_dm_workarea(&key, cmd_q,
1059 CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1064 dm_offset = CCP_KSB_BYTES - aes->key_len;
1065 ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
1066 ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1067 CCP_PASSTHRU_BYTESWAP_256BIT);
1069 cmd->engine_error = cmd_q->cmd_error;
1073 /* The AES context fits in a single (32-byte) KSB entry and
1074 * must be in little endian format. Use the 256-bit byte swap
1075 * passthru option to convert from big endian to little endian.
1077 ret = ccp_init_dm_workarea(&ctx, cmd_q,
1078 CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1083 if (aes->mode != CCP_AES_MODE_ECB) {
1084 /* Load the AES context - conver to LE */
1085 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1086 ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1087 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1088 CCP_PASSTHRU_BYTESWAP_256BIT);
1090 cmd->engine_error = cmd_q->cmd_error;
1095 /* Prepare the input and output data workareas. For in-place
1096 * operations we need to set the dma direction to BIDIRECTIONAL
1097 * and copy the src workarea to the dst workarea.
1099 if (sg_virt(aes->src) == sg_virt(aes->dst))
1102 ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
1104 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1111 ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1112 AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1117 /* Send data to the CCP AES engine */
1118 while (src.sg_wa.bytes_left) {
1119 ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1120 if (!src.sg_wa.bytes_left) {
1123 /* Since we don't retrieve the AES context in ECB
1124 * mode we have to wait for the operation to complete
1125 * on the last piece of data
1127 if (aes->mode == CCP_AES_MODE_ECB)
1131 ret = ccp_perform_aes(&op);
1133 cmd->engine_error = cmd_q->cmd_error;
1137 ccp_process_data(&src, &dst, &op);
1140 if (aes->mode != CCP_AES_MODE_ECB) {
1141 /* Retrieve the AES context - convert from LE to BE using
1142 * 32-byte (256-bit) byteswapping
1144 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1145 CCP_PASSTHRU_BYTESWAP_256BIT);
1147 cmd->engine_error = cmd_q->cmd_error;
1151 /* ...but we only need AES_BLOCK_SIZE bytes */
1152 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1153 ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1158 ccp_free_data(&dst, cmd_q);
1161 ccp_free_data(&src, cmd_q);
1172 static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
1173 struct ccp_cmd *cmd)
1175 struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1176 struct ccp_dm_workarea key, ctx;
1177 struct ccp_data src, dst;
1179 unsigned int unit_size, dm_offset;
1180 bool in_place = false;
1183 switch (xts->unit_size) {
1184 case CCP_XTS_AES_UNIT_SIZE_16:
1187 case CCP_XTS_AES_UNIT_SIZE_512:
1190 case CCP_XTS_AES_UNIT_SIZE_1024:
1193 case CCP_XTS_AES_UNIT_SIZE_2048:
1196 case CCP_XTS_AES_UNIT_SIZE_4096:
1204 if (xts->key_len != AES_KEYSIZE_128)
1207 if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1210 if (xts->iv_len != AES_BLOCK_SIZE)
1213 if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1216 BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
1217 BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
1220 memset(&op, 0, sizeof(op));
1222 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1223 op.ksb_key = cmd_q->ksb_key;
1224 op.ksb_ctx = cmd_q->ksb_ctx;
1226 op.u.xts.action = xts->action;
1227 op.u.xts.unit_size = xts->unit_size;
1229 /* All supported key sizes fit in a single (32-byte) KSB entry
1230 * and must be in little endian format. Use the 256-bit byte
1231 * swap passthru option to convert from big endian to little
1234 ret = ccp_init_dm_workarea(&key, cmd_q,
1235 CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1240 dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
1241 ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1242 ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
1243 ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1244 CCP_PASSTHRU_BYTESWAP_256BIT);
1246 cmd->engine_error = cmd_q->cmd_error;
1250 /* The AES context fits in a single (32-byte) KSB entry and
1251 * for XTS is already in little endian format so no byte swapping
1254 ret = ccp_init_dm_workarea(&ctx, cmd_q,
1255 CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1260 ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1261 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1262 CCP_PASSTHRU_BYTESWAP_NOOP);
1264 cmd->engine_error = cmd_q->cmd_error;
1268 /* Prepare the input and output data workareas. For in-place
1269 * operations we need to set the dma direction to BIDIRECTIONAL
1270 * and copy the src workarea to the dst workarea.
1272 if (sg_virt(xts->src) == sg_virt(xts->dst))
1275 ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1277 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1284 ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1285 unit_size, DMA_FROM_DEVICE);
1290 /* Send data to the CCP AES engine */
1291 while (src.sg_wa.bytes_left) {
1292 ccp_prepare_data(&src, &dst, &op, unit_size, true);
1293 if (!src.sg_wa.bytes_left)
1296 ret = ccp_perform_xts_aes(&op);
1298 cmd->engine_error = cmd_q->cmd_error;
1302 ccp_process_data(&src, &dst, &op);
1305 /* Retrieve the AES context - convert from LE to BE using
1306 * 32-byte (256-bit) byteswapping
1308 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1309 CCP_PASSTHRU_BYTESWAP_256BIT);
1311 cmd->engine_error = cmd_q->cmd_error;
1315 /* ...but we only need AES_BLOCK_SIZE bytes */
1316 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1317 ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1321 ccp_free_data(&dst, cmd_q);
1324 ccp_free_data(&src, cmd_q);
1335 static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1337 struct ccp_sha_engine *sha = &cmd->u.sha;
1338 struct ccp_dm_workarea ctx;
1339 struct ccp_data src;
1343 if (sha->ctx_len != CCP_SHA_CTXSIZE)
1349 if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
1352 if (!sha->src_len) {
1355 /* Not final, just return */
1359 /* CCP can't do a zero length sha operation so the caller
1360 * must buffer the data.
1365 /* A sha operation for a message with a total length of zero,
1366 * return known result.
1368 switch (sha->type) {
1369 case CCP_SHA_TYPE_1:
1370 sha_zero = ccp_sha1_zero;
1372 case CCP_SHA_TYPE_224:
1373 sha_zero = ccp_sha224_zero;
1375 case CCP_SHA_TYPE_256:
1376 sha_zero = ccp_sha256_zero;
1382 scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1391 BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
1393 memset(&op, 0, sizeof(op));
1395 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1396 op.ksb_ctx = cmd_q->ksb_ctx;
1397 op.u.sha.type = sha->type;
1398 op.u.sha.msg_bits = sha->msg_bits;
1400 /* The SHA context fits in a single (32-byte) KSB entry and
1401 * must be in little endian format. Use the 256-bit byte swap
1402 * passthru option to convert from big endian to little endian.
1404 ret = ccp_init_dm_workarea(&ctx, cmd_q,
1405 CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
1410 ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1411 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1412 CCP_PASSTHRU_BYTESWAP_256BIT);
1414 cmd->engine_error = cmd_q->cmd_error;
1418 /* Send data to the CCP SHA engine */
1419 ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1420 CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
1424 while (src.sg_wa.bytes_left) {
1425 ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
1426 if (sha->final && !src.sg_wa.bytes_left)
1429 ret = ccp_perform_sha(&op);
1431 cmd->engine_error = cmd_q->cmd_error;
1435 ccp_process_data(&src, NULL, &op);
1438 /* Retrieve the SHA context - convert from LE to BE using
1439 * 32-byte (256-bit) byteswapping to BE
1441 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1442 CCP_PASSTHRU_BYTESWAP_256BIT);
1444 cmd->engine_error = cmd_q->cmd_error;
1448 ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1451 ccp_free_data(&src, cmd_q);
1459 static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1461 struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1462 struct ccp_dm_workarea exp, src;
1463 struct ccp_data dst;
1465 unsigned int ksb_count, i_len, o_len;
1468 if (rsa->key_size > CCP_RSA_MAX_WIDTH)
1471 if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1474 /* The RSA modulus must precede the message being acted upon, so
1475 * it must be copied to a DMA area where the message and the
1476 * modulus can be concatenated. Therefore the input buffer
1477 * length required is twice the output buffer length (which
1478 * must be a multiple of 256-bits).
1480 o_len = ((rsa->key_size + 255) / 256) * 32;
1483 ksb_count = o_len / CCP_KSB_BYTES;
1485 memset(&op, 0, sizeof(op));
1487 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1488 op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
1492 /* The RSA exponent may span multiple (32-byte) KSB entries and must
1493 * be in little endian format. Reverse copy each 32-byte chunk
1494 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
1495 * and each byte within that chunk and do not perform any byte swap
1496 * operations on the passthru operation.
1498 ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1502 ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len, CCP_KSB_BYTES,
1504 ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
1505 CCP_PASSTHRU_BYTESWAP_NOOP);
1507 cmd->engine_error = cmd_q->cmd_error;
1511 /* Concatenate the modulus and the message. Both the modulus and
1512 * the operands must be in little endian format. Since the input
1513 * is in big endian format it must be converted.
1515 ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1519 ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len, CCP_KSB_BYTES,
1521 src.address += o_len; /* Adjust the address for the copy operation */
1522 ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len, CCP_KSB_BYTES,
1524 src.address -= o_len; /* Reset the address to original value */
1526 /* Prepare the output area for the operation */
1527 ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
1528 o_len, DMA_FROM_DEVICE);
1533 op.src.u.dma.address = src.dma.address;
1534 op.src.u.dma.offset = 0;
1535 op.src.u.dma.length = i_len;
1536 op.dst.u.dma.address = dst.dm_wa.dma.address;
1537 op.dst.u.dma.offset = 0;
1538 op.dst.u.dma.length = o_len;
1540 op.u.rsa.mod_size = rsa->key_size;
1541 op.u.rsa.input_len = i_len;
1543 ret = ccp_perform_rsa(&op);
1545 cmd->engine_error = cmd_q->cmd_error;
1549 ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len);
1552 ccp_free_data(&dst, cmd_q);
1561 ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
1566 static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
1567 struct ccp_cmd *cmd)
1569 struct ccp_passthru_engine *pt = &cmd->u.passthru;
1570 struct ccp_dm_workarea mask;
1571 struct ccp_data src, dst;
1573 bool in_place = false;
1577 if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1580 if (!pt->src || !pt->dst)
1583 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1584 if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1590 BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
1592 memset(&op, 0, sizeof(op));
1594 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1596 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1598 op.ksb_key = cmd_q->ksb_key;
1600 ret = ccp_init_dm_workarea(&mask, cmd_q,
1601 CCP_PASSTHRU_KSB_COUNT *
1607 ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1608 ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
1609 CCP_PASSTHRU_BYTESWAP_NOOP);
1611 cmd->engine_error = cmd_q->cmd_error;
1616 /* Prepare the input and output data workareas. For in-place
1617 * operations we need to set the dma direction to BIDIRECTIONAL
1618 * and copy the src workarea to the dst workarea.
1620 if (sg_virt(pt->src) == sg_virt(pt->dst))
1623 ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
1624 CCP_PASSTHRU_MASKSIZE,
1625 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1632 ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
1633 CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
1638 /* Send data to the CCP Passthru engine
1639 * Because the CCP engine works on a single source and destination
1640 * dma address at a time, each entry in the source scatterlist
1641 * (after the dma_map_sg call) must be less than or equal to the
1642 * (remaining) length in the destination scatterlist entry and the
1643 * length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
1645 dst.sg_wa.sg_used = 0;
1646 for (i = 1; i <= src.sg_wa.dma_count; i++) {
1647 if (!dst.sg_wa.sg ||
1648 (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
1653 if (i == src.sg_wa.dma_count) {
1658 op.src.type = CCP_MEMTYPE_SYSTEM;
1659 op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
1660 op.src.u.dma.offset = 0;
1661 op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
1663 op.dst.type = CCP_MEMTYPE_SYSTEM;
1664 op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
1665 op.src.u.dma.offset = dst.sg_wa.sg_used;
1666 op.src.u.dma.length = op.src.u.dma.length;
1668 ret = ccp_perform_passthru(&op);
1670 cmd->engine_error = cmd_q->cmd_error;
1674 dst.sg_wa.sg_used += src.sg_wa.sg->length;
1675 if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
1676 dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
1677 dst.sg_wa.sg_used = 0;
1679 src.sg_wa.sg = sg_next(src.sg_wa.sg);
1684 ccp_free_data(&dst, cmd_q);
1687 ccp_free_data(&src, cmd_q);
1690 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
1696 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1698 struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1699 struct ccp_dm_workarea src, dst;
1704 if (!ecc->u.mm.operand_1 ||
1705 (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
1708 if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
1709 if (!ecc->u.mm.operand_2 ||
1710 (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
1713 if (!ecc->u.mm.result ||
1714 (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
1717 memset(&op, 0, sizeof(op));
1719 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1721 /* Concatenate the modulus and the operands. Both the modulus and
1722 * the operands must be in little endian format. Since the input
1723 * is in big endian format it must be converted and placed in a
1724 * fixed length buffer.
1726 ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1731 /* Save the workarea address since it is updated in order to perform
1736 /* Copy the ECC modulus */
1737 ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1738 CCP_ECC_OPERAND_SIZE, true);
1739 src.address += CCP_ECC_OPERAND_SIZE;
1741 /* Copy the first operand */
1742 ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
1743 ecc->u.mm.operand_1_len,
1744 CCP_ECC_OPERAND_SIZE, true);
1745 src.address += CCP_ECC_OPERAND_SIZE;
1747 if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
1748 /* Copy the second operand */
1749 ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
1750 ecc->u.mm.operand_2_len,
1751 CCP_ECC_OPERAND_SIZE, true);
1752 src.address += CCP_ECC_OPERAND_SIZE;
1755 /* Restore the workarea address */
1758 /* Prepare the output area for the operation */
1759 ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1765 op.src.u.dma.address = src.dma.address;
1766 op.src.u.dma.offset = 0;
1767 op.src.u.dma.length = src.length;
1768 op.dst.u.dma.address = dst.dma.address;
1769 op.dst.u.dma.offset = 0;
1770 op.dst.u.dma.length = dst.length;
1772 op.u.ecc.function = cmd->u.ecc.function;
1774 ret = ccp_perform_ecc(&op);
1776 cmd->engine_error = cmd_q->cmd_error;
1780 ecc->ecc_result = le16_to_cpup(
1781 (const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1782 if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1787 /* Save the ECC result */
1788 ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES);
1799 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1801 struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1802 struct ccp_dm_workarea src, dst;
1807 if (!ecc->u.pm.point_1.x ||
1808 (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
1809 !ecc->u.pm.point_1.y ||
1810 (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
1813 if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1814 if (!ecc->u.pm.point_2.x ||
1815 (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
1816 !ecc->u.pm.point_2.y ||
1817 (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
1820 if (!ecc->u.pm.domain_a ||
1821 (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
1824 if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
1825 if (!ecc->u.pm.scalar ||
1826 (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
1830 if (!ecc->u.pm.result.x ||
1831 (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
1832 !ecc->u.pm.result.y ||
1833 (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
1836 memset(&op, 0, sizeof(op));
1838 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1840 /* Concatenate the modulus and the operands. Both the modulus and
1841 * the operands must be in little endian format. Since the input
1842 * is in big endian format it must be converted and placed in a
1843 * fixed length buffer.
1845 ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1850 /* Save the workarea address since it is updated in order to perform
1855 /* Copy the ECC modulus */
1856 ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1857 CCP_ECC_OPERAND_SIZE, true);
1858 src.address += CCP_ECC_OPERAND_SIZE;
1860 /* Copy the first point X and Y coordinate */
1861 ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
1862 ecc->u.pm.point_1.x_len,
1863 CCP_ECC_OPERAND_SIZE, true);
1864 src.address += CCP_ECC_OPERAND_SIZE;
1865 ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
1866 ecc->u.pm.point_1.y_len,
1867 CCP_ECC_OPERAND_SIZE, true);
1868 src.address += CCP_ECC_OPERAND_SIZE;
1870 /* Set the first point Z coordianate to 1 */
1871 *(src.address) = 0x01;
1872 src.address += CCP_ECC_OPERAND_SIZE;
1874 if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1875 /* Copy the second point X and Y coordinate */
1876 ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
1877 ecc->u.pm.point_2.x_len,
1878 CCP_ECC_OPERAND_SIZE, true);
1879 src.address += CCP_ECC_OPERAND_SIZE;
1880 ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
1881 ecc->u.pm.point_2.y_len,
1882 CCP_ECC_OPERAND_SIZE, true);
1883 src.address += CCP_ECC_OPERAND_SIZE;
1885 /* Set the second point Z coordianate to 1 */
1886 *(src.address) = 0x01;
1887 src.address += CCP_ECC_OPERAND_SIZE;
1889 /* Copy the Domain "a" parameter */
1890 ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
1891 ecc->u.pm.domain_a_len,
1892 CCP_ECC_OPERAND_SIZE, true);
1893 src.address += CCP_ECC_OPERAND_SIZE;
1895 if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
1896 /* Copy the scalar value */
1897 ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
1898 ecc->u.pm.scalar_len,
1899 CCP_ECC_OPERAND_SIZE, true);
1900 src.address += CCP_ECC_OPERAND_SIZE;
1904 /* Restore the workarea address */
1907 /* Prepare the output area for the operation */
1908 ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1914 op.src.u.dma.address = src.dma.address;
1915 op.src.u.dma.offset = 0;
1916 op.src.u.dma.length = src.length;
1917 op.dst.u.dma.address = dst.dma.address;
1918 op.dst.u.dma.offset = 0;
1919 op.dst.u.dma.length = dst.length;
1921 op.u.ecc.function = cmd->u.ecc.function;
1923 ret = ccp_perform_ecc(&op);
1925 cmd->engine_error = cmd_q->cmd_error;
1929 ecc->ecc_result = le16_to_cpup(
1930 (const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1931 if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1936 /* Save the workarea address since it is updated as we walk through
1937 * to copy the point math result
1941 /* Save the ECC result X and Y coordinates */
1942 ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x,
1943 CCP_ECC_MODULUS_BYTES);
1944 dst.address += CCP_ECC_OUTPUT_SIZE;
1945 ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y,
1946 CCP_ECC_MODULUS_BYTES);
1947 dst.address += CCP_ECC_OUTPUT_SIZE;
1949 /* Restore the workarea address */
1961 static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1963 struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1965 ecc->ecc_result = 0;
1968 (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
1971 switch (ecc->function) {
1972 case CCP_ECC_FUNCTION_MMUL_384BIT:
1973 case CCP_ECC_FUNCTION_MADD_384BIT:
1974 case CCP_ECC_FUNCTION_MINV_384BIT:
1975 return ccp_run_ecc_mm_cmd(cmd_q, cmd);
1977 case CCP_ECC_FUNCTION_PADD_384BIT:
1978 case CCP_ECC_FUNCTION_PMUL_384BIT:
1979 case CCP_ECC_FUNCTION_PDBL_384BIT:
1980 return ccp_run_ecc_pm_cmd(cmd_q, cmd);
1987 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1991 cmd->engine_error = 0;
1992 cmd_q->cmd_error = 0;
1993 cmd_q->int_rcvd = 0;
1994 cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
1996 switch (cmd->engine) {
1997 case CCP_ENGINE_AES:
1998 ret = ccp_run_aes_cmd(cmd_q, cmd);
2000 case CCP_ENGINE_XTS_AES_128:
2001 ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2003 case CCP_ENGINE_SHA:
2004 ret = ccp_run_sha_cmd(cmd_q, cmd);
2006 case CCP_ENGINE_RSA:
2007 ret = ccp_run_rsa_cmd(cmd_q, cmd);
2009 case CCP_ENGINE_PASSTHRU:
2010 ret = ccp_run_passthru_cmd(cmd_q, cmd);
2012 case CCP_ENGINE_ECC:
2013 ret = ccp_run_ecc_cmd(cmd_q, cmd);