2 * Copyright (c) 2009 Chris K Cockrum <ckc@cockrum.net>
4 * Copyright (c) 2013 Jens Trillmann <jtrillma@tzi.de>
5 * Copyright (c) 2013 Marc Müller-Weinhardt <muewei@tzi.de>
6 * Copyright (c) 2013 Lars Schmertmann <lars@tzi.de>
7 * Copyright (c) 2013 Hauke Mehrtens <hauke@hauke-m.de>
9 * Permission is hereby granted, free of charge, to any person obtaining a copy
10 * of this software and associated documentation files (the "Software"), to deal
11 * in the Software without restriction, including without limitation the rights
12 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 * copies of the Software, and to permit persons to whom the Software is
14 * furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice shall be included in
17 * all copies or substantial portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
28 * This implementation is based in part on the paper Implementation of an
29 * Elliptic Curve Cryptosystem on an 8-bit Microcontroller [0] by
30 * Chris K Cockrum <ckc@cockrum.net>.
32 * [0]: http://cockrum.net/Implementation_of_ECC_on_an_8-bit_microcontroller.pdf
34 * This is a efficient ECC implementation on the secp256r1 curve for 32 Bit CPU
35 * architectures. It provides basic operations on the secp256r1 curve and support
39 //big number functions
43 static uint32_t add( const uint32_t *x, const uint32_t *y, uint32_t *result, uint8_t length){
44 uint64_t d = 0; //carry
46 for(v = 0;v<length;v++){
47 //printf("%02x + %02x + %01x = ", x[v], y[v], d);
48 d += (uint64_t) x[v] + (uint64_t) y[v];
49 //printf("%02x\n", d);
51 d = d>>32; //save carry
57 static uint32_t sub( const uint32_t *x, const uint32_t *y, uint32_t *result, uint8_t length){
60 for(v = 0;v < length; v++){
61 d = (uint64_t) x[v] - (uint64_t) y[v] - d;
62 result[v] = d & 0xFFFFFFFF;
69 static void rshiftby(const uint32_t *in, uint8_t in_size, uint32_t *out, uint8_t out_size, uint8_t shift) {
72 for (i = 0; i < (in_size - shift) && i < out_size; i++)
73 out[i] = in[i + shift];
74 for (/* reuse i */; i < out_size; i++)
78 //finite field functions
79 //FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF
80 static const uint32_t ecc_prime_m[8] = {0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
81 0x00000000, 0x00000000, 0x00000001, 0xffffffff};
84 /* This is added after an static byte addition if the answer has a carry in MSB*/
85 static const uint32_t ecc_prime_r[8] = {0x00000001, 0x00000000, 0x00000000, 0xffffffff,
86 0xffffffff, 0xffffffff, 0xfffffffe, 0x00000000};
88 // ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551
89 static const uint32_t ecc_order_m[9] = {0xFC632551, 0xF3B9CAC2, 0xA7179E84, 0xBCE6FAAD,
90 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF,
93 static const uint32_t ecc_order_r[8] = {0x039CDAAF, 0x0C46353D, 0x58E8617B, 0x43190552,
94 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000};
96 static const uint32_t ecc_order_mu[9] = {0xEEDF9BFE, 0x012FFD85, 0xDF1A6C21, 0x43190552,
97 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x00000000,
100 static const uint8_t ecc_order_k = 8;
102 const uint32_t ecc_g_point_x[8] = { 0xD898C296, 0xF4A13945, 0x2DEB33A0, 0x77037D81,
103 0x63A440F2, 0xF8BCE6E5, 0xE12C4247, 0x6B17D1F2};
104 const uint32_t ecc_g_point_y[8] = { 0x37BF51F5, 0xCBB64068, 0x6B315ECE, 0x2BCE3357,
105 0x7C0F9E16, 0x8EE7EB4A, 0xFE1A7F9B, 0x4FE342E2};
108 static void setZero(uint32_t *A, const int length){
109 memset(A, 0x0, length * sizeof(uint32_t));
113 * copy one array to another
115 static void copy(const uint32_t *from, uint32_t *to, uint8_t length){
116 memcpy(to, from, length * sizeof(uint32_t));
119 static int isSame(const uint32_t *A, const uint32_t *B, uint8_t length){
120 return !memcmp(A, B, length * sizeof(uint32_t));
123 //is A greater than B?
124 static int isGreater(const uint32_t *A, const uint32_t *B, uint8_t length){
126 for (i = length-1; i >= 0; --i)
137 static int fieldAdd(const uint32_t *x, const uint32_t *y, const uint32_t *reducer, uint32_t *result){
138 if(add(x, y, result, arrayLength)){ //add prime if carry is still set!
141 add(result, reducer, tempas, arrayLength);
142 copy(tempas, result, arrayLength);
147 static int fieldSub(const uint32_t *x, const uint32_t *y, const uint32_t *modulus, uint32_t *result){
148 if(sub(x, y, result, arrayLength)){ //add modulus if carry is set
151 add(result, modulus, tempas, arrayLength);
152 copy(tempas, result, arrayLength);
157 //finite Field multiplication
158 //32bit * 32bit = 64bit
159 static int fieldMult(const uint32_t *x, const uint32_t *y, uint32_t *result, uint8_t length){
160 uint32_t temp[length * 2];
161 setZero(temp, length * 2);
162 setZero(result, length * 2);
165 for (k = 0; k < length; k++){
166 for (n = 0; n < length; n++){
167 l = (uint64_t)x[n]*(uint64_t)y[k];
168 temp[n+k] = l&0xFFFFFFFF;
170 add(&temp[n+k], &result[n+k], &result[n+k], (length * 2) - (n + k));
172 setZero(temp, length * 2);
179 //fffffffe00000002fffffffe0000000100000001fffffffe00000001fffffffe00000001fffffffefffffffffffffffffffffffe000000000000000000000001_16
180 static void fieldModP(uint32_t *A, const uint32_t *B)
188 copy(B,A,arrayLength);
191 for(n=0;n<3;n++) tempm[n]=0;
192 for(n=3;n<8;n++) tempm[n]=B[n+8];
195 fieldAdd(A,tempm,ecc_prime_r,tempm2);
197 fieldAdd(tempm2,tempm,ecc_prime_r,A);
199 for(n=0;n<3;n++) tempm[n]=0;
200 for(n=3;n<7;n++) tempm[n]=B[n+9];
201 for(n=7;n<8;n++) tempm[n]=0;
202 /* tempm2=T+S1+S1+S2 */
203 fieldAdd(A,tempm,ecc_prime_r,tempm2);
204 /* A=T+S1+S1+S2+S2 */
205 fieldAdd(tempm2,tempm,ecc_prime_r,A);
207 for(n=0;n<3;n++) tempm[n]=B[n+8];
208 for(n=3;n<6;n++) tempm[n]=0;
209 for(n=6;n<8;n++) tempm[n]=B[n+8];
210 /* tempm2=T+S1+S1+S2+S2+S3 */
211 fieldAdd(A,tempm,ecc_prime_r,tempm2);
213 for(n=0;n<3;n++) tempm[n]=B[n+9];
214 for(n=3;n<6;n++) tempm[n]=B[n+10];
215 for(n=6;n<7;n++) tempm[n]=B[n+7];
216 for(n=7;n<8;n++) tempm[n]=B[n+1];
217 /* A=T+S1+S1+S2+S2+S3+S4 */
218 fieldAdd(tempm2,tempm,ecc_prime_r,A);
220 for(n=0;n<3;n++) tempm[n]=B[n+11];
221 for(n=3;n<6;n++) tempm[n]=0;
222 for(n=6;n<7;n++) tempm[n]=B[n+2];
223 for(n=7;n<8;n++) tempm[n]=B[n+3];
224 /* tempm2=T+S1+S1+S2+S2+S3+S4-D1 */
225 fieldSub(A,tempm,ecc_prime_m,tempm2);
227 for(n=0;n<4;n++) tempm[n]=B[n+12];
228 for(n=4;n<6;n++) tempm[n]=0;
229 for(n=6;n<7;n++) tempm[n]=B[n+3];
230 for(n=7;n<8;n++) tempm[n]=B[n+4];
231 /* A=T+S1+S1+S2+S2+S3+S4-D1-D2 */
232 fieldSub(tempm2,tempm,ecc_prime_m,A);
234 for(n=0;n<3;n++) tempm[n]=B[n+13];
235 for(n=3;n<6;n++) tempm[n]=B[n+5];
236 for(n=6;n<7;n++) tempm[n]=0;
237 for(n=7;n<8;n++) tempm[n]=B[n+5];
238 /* tempm2=T+S1+S1+S2+S2+S3+S4-D1-D2-D3 */
239 fieldSub(A,tempm,ecc_prime_m,tempm2);
241 for(n=0;n<2;n++) tempm[n]=B[n+14];
242 for(n=2;n<3;n++) tempm[n]=0;
243 for(n=3;n<6;n++) tempm[n]=B[n+6];
244 for(n=6;n<7;n++) tempm[n]=0;
245 for(n=7;n<8;n++) tempm[n]=B[n+6];
246 /* A=T+S1+S1+S2+S2+S3+S4-D1-D2-D3-D4 */
247 fieldSub(tempm2,tempm,ecc_prime_m,A);
248 if(isGreater(A, ecc_prime_m, arrayLength) >= 0){
249 fieldSub(A, ecc_prime_m, ecc_prime_m, tempm);
250 copy(tempm, A, arrayLength);
255 * calculate the result = A mod n.
256 * n is the order of the eliptic curve.
257 * A and result could point to the same value
259 * A: input value (max size * 4 bytes)
260 * result: result of modulo calculation (max 36 bytes)
263 * This uses the Barrett modular reduction as described in the Handbook
264 * of Applied Cryptography 14.42 Algorithm Barrett modular reduction,
265 * see http://cacr.uwaterloo.ca/hac/about/chap14.pdf and
266 * http://everything2.com/title/Barrett+Reduction
268 * b = 32 (bite size of the processor architecture)
269 * mu (ecc_order_mu) was precomputed in a java program
271 static void fieldModO(const uint32_t *A, uint32_t *result, uint8_t length) {
272 // This is used for value q1 and q3
274 // This is used for q2 and a temp var
277 // return if the given value is smaller than the modulus
278 if (length == arrayLength && isGreater(A, ecc_order_m, arrayLength) <= 0) {
280 copy(A, result, length);
284 rshiftby(A, length, q1_q3, 9, ecc_order_k - 1);
286 fieldMult(ecc_order_mu, q1_q3, q2_tmp, 9);
288 rshiftby(q2_tmp, 18, q1_q3, 8, ecc_order_k + 1);
290 // r1 = first 9 blocks of A
292 fieldMult(q1_q3, ecc_order_m, q2_tmp, 8);
294 // r2 = first 9 blocks of q2_tmp
296 sub(A, q2_tmp, result, 9);
298 while (isGreater(result, ecc_order_m, 9) >= 0)
299 sub(result, ecc_order_m, result, 9);
302 static int isOne(const uint32_t* A){
308 if ((n==8)&&(A[0]==1))
314 static int isZero(const uint32_t* A){
322 static void rshift(uint32_t* A){
328 A[i] = A[i]>>1 | nOld<<31;
333 static int fieldAddAndDivide(const uint32_t *x, const uint32_t *modulus, const uint32_t *reducer, uint32_t* result){
334 uint32_t n = add(x, modulus, result, arrayLength);
336 if(n){ //add prime if carry is still set!
337 result[7] |= 0x80000000;//add the carry
338 if (isGreater(result, modulus, arrayLength) == 1)
342 add(result, reducer, tempas, 8);
343 copy(tempas, result, arrayLength);
351 * Inverse A and output to B
353 static void fieldInv(const uint32_t *A, const uint32_t *modulus, const uint32_t *reducer, uint32_t *B){
354 uint32_t u[8],v[8],x1[8],x2[8];
363 copy(A,u,arrayLength);
364 copy(modulus,v,arrayLength);
368 /* While u !=1 and v !=1 */
369 while ((isOne(u) || isOne(v))==0) {
370 while(!(u[0]&1)) { /* While u is even */
371 rshift(u); /* divide by 2 */
372 if (!(x1[0]&1)) /*ifx1iseven*/
373 rshift(x1); /* Divide by 2 */
375 fieldAddAndDivide(x1,modulus,reducer,tempm); /* tempm=x1+p */
376 copy(tempm,x1,arrayLength); /* x1=tempm */
377 //rshift(x1); /* Divide by 2 */
380 while(!(v[0]&1)) { /* While v is even */
381 rshift(v); /* divide by 2 */
382 if (!(x2[0]&1)) /*ifx1iseven*/
383 rshift(x2); /* Divide by 2 */
386 fieldAddAndDivide(x2,modulus,reducer,tempm); /* tempm=x1+p */
387 copy(tempm,x2,arrayLength); /* x1=tempm */
388 //rshift(x2); /* Divide by 2 */
392 t=sub(u,v,tempm,arrayLength); /* tempm=u-v */
393 if (t==0) { /* If u > 0 */
394 copy(tempm,u,arrayLength); /* u=u-v */
395 fieldSub(x1,x2,modulus,tempm); /* tempm=x1-x2 */
396 copy(tempm,x1,arrayLength); /* x1=x1-x2 */
398 sub(v,u,tempm,arrayLength); /* tempm=v-u */
399 copy(tempm,v,arrayLength); /* v=v-u */
400 fieldSub(x2,x1,modulus,tempm); /* tempm=x2-x1 */
401 copy(tempm,x2,arrayLength); /* x2=x2-x1 */
405 copy(x1,B,arrayLength);
407 copy(x2,B,arrayLength);
411 void static ec_double(const uint32_t *px, const uint32_t *py, uint32_t *Dx, uint32_t *Dy){
417 if(isZero(px) && isZero(py)){
418 copy(px, Dx,arrayLength);
419 copy(py, Dy,arrayLength);
423 fieldMult(px, px, tempD, arrayLength);
424 fieldModP(tempA, tempD);
426 tempB[0] = 0x00000001;
427 fieldSub(tempA, tempB, ecc_prime_m, tempC); //tempC = (qx^2-1)
428 tempB[0] = 0x00000003;
429 fieldMult(tempC, tempB, tempD, arrayLength);
430 fieldModP(tempA, tempD);//tempA = 3*(qx^2-1)
431 fieldAdd(py, py, ecc_prime_r, tempB); //tempB = 2*qy
432 fieldInv(tempB, ecc_prime_m, ecc_prime_r, tempC); //tempC = 1/(2*qy)
433 fieldMult(tempA, tempC, tempD, arrayLength); //tempB = lambda = (3*(qx^2-1))/(2*qy)
434 fieldModP(tempB, tempD);
436 fieldMult(tempB, tempB, tempD, arrayLength); //tempC = lambda^2
437 fieldModP(tempC, tempD);
438 fieldSub(tempC, px, ecc_prime_m, tempA); //lambda^2 - Px
439 fieldSub(tempA, px, ecc_prime_m, Dx); //lambda^2 - Px - Qx
441 fieldSub(px, Dx, ecc_prime_m, tempA); //tempA = qx-dx
442 fieldMult(tempB, tempA, tempD, arrayLength); //tempC = lambda * (qx-dx)
443 fieldModP(tempC, tempD);
444 fieldSub(tempC, py, ecc_prime_m, Dy); //Dy = lambda * (qx-dx) - px
447 void static ec_add(const uint32_t *px, const uint32_t *py, const uint32_t *qx, const uint32_t *qy, uint32_t *Sx, uint32_t *Sy){
453 if(isZero(px) && isZero(py)){
454 copy(qx, Sx,arrayLength);
455 copy(qy, Sy,arrayLength);
457 } else if(isZero(qx) && isZero(qy)) {
458 copy(px, Sx,arrayLength);
459 copy(py, Sy,arrayLength);
463 if(isSame(px, qx, arrayLength)){
464 if(!isSame(py, qy, arrayLength)){
469 ec_double(px, py, Sx, Sy);
474 fieldSub(py, qy, ecc_prime_m, tempA);
475 fieldSub(px, qx, ecc_prime_m, tempB);
476 fieldInv(tempB, ecc_prime_m, ecc_prime_r, tempB);
477 fieldMult(tempA, tempB, tempD, arrayLength);
478 fieldModP(tempC, tempD); //tempC = lambda
480 fieldMult(tempC, tempC, tempD, arrayLength); //tempA = lambda^2
481 fieldModP(tempA, tempD);
482 fieldSub(tempA, px, ecc_prime_m, tempB); //lambda^2 - Px
483 fieldSub(tempB, qx, ecc_prime_m, Sx); //lambda^2 - Px - Qx
485 fieldSub(qx, Sx, ecc_prime_m, tempB);
486 fieldMult(tempC, tempB, tempD, arrayLength);
487 fieldModP(tempC, tempD);
488 fieldSub(tempC, qy, ecc_prime_m, Sy);
491 void ecc_ec_mult(const uint32_t *px, const uint32_t *py, const uint32_t *secret, uint32_t *resultx, uint32_t *resulty){
502 ec_double(Qx, Qy, tempx, tempy);
503 copy(tempx, Qx,arrayLength);
504 copy(tempy, Qy,arrayLength);
505 if (((secret[i / 32]) & ((uint32_t)1 << (i % 32)))) {
506 ec_add(Qx, Qy, px, py, tempx, tempy); //eccAdd
507 copy(tempx, Qx,arrayLength);
508 copy(tempy, Qy,arrayLength);
511 copy(Qx, resultx,arrayLength);
512 copy(Qy, resulty,arrayLength);
516 * Calculate the ecdsa signature.
518 * For a description of this algorithm see
519 * https://en.wikipedia.org/wiki/Elliptic_Curve_DSA#Signature_generation_algorithm
522 * d: private key on the curve secp256r1 (32 bytes)
523 * e: hash to sign (32 bytes)
524 * k: random data, this must be changed for every signature (32 bytes)
527 * r: r value of the signature (36 bytes)
528 * s: s value of the signature (36 bytes)
531 * 0: everything is ok
532 * -1: can not create signature, try again with different k.
534 int ecc_ecdsa_sign(const uint32_t *d, const uint32_t *e, const uint32_t *k, uint32_t *r, uint32_t *s)
543 // 4. Calculate the curve point (x_1, y_1) = k * G.
544 ecc_ec_mult(ecc_g_point_x, ecc_g_point_y, k, r, tmp1);
546 // 5. Calculate r = x_1 \pmod{n}.
549 // 5. If r = 0, go back to step 3.
553 // 6. Calculate s = k^{-1}(z + r d_A) \pmod{n}.
555 fieldMult(r, d, tmp1, arrayLength);
556 fieldModO(tmp1, tmp2, 16);
559 tmp1[8] = add(e, tmp2, tmp1, 8);
560 fieldModO(tmp1, tmp3, 9);
563 fieldInv(k, ecc_order_m, ecc_order_r, tmp2);
565 // 6. (k^{-1}) (z + (r d))
566 fieldMult(tmp2, tmp3, tmp1, arrayLength);
567 fieldModO(tmp1, s, 16);
569 // 6. If s = 0, go back to step 3.
577 * Verifies a ecdsa signature.
579 * For a description of this algorithm see
580 * https://en.wikipedia.org/wiki/Elliptic_Curve_DSA#Signature_verification_algorithm
583 * x: x coordinate of the public key (32 bytes)
584 * y: y coordinate of the public key (32 bytes)
585 * e: hash to verify the signature of (32 bytes)
586 * r: r value of the signature (32 bytes)
587 * s: s value of the signature (32 bytes)
591 * -1: signature check failed the signature is invalid
593 int ecc_ecdsa_validate(const uint32_t *x, const uint32_t *y, const uint32_t *e, const uint32_t *r, const uint32_t *s)
606 // 3. Calculate w = s^{-1} \pmod{n}
607 fieldInv(s, ecc_order_m, ecc_order_r, w);
609 // 4. Calculate u_1 = zw \pmod{n}
610 fieldMult(e, w, tmp, arrayLength);
611 fieldModO(tmp, u1, 16);
613 // 4. Calculate u_2 = rw \pmod{n}
614 fieldMult(r, w, tmp, arrayLength);
615 fieldModO(tmp, u2, 16);
617 // 5. Calculate the curve point (x_1, y_1) = u_1 * G + u_2 * Q_A.
619 ecc_ec_mult(ecc_g_point_x, ecc_g_point_y, u1, tmp1_x, tmp1_y);
622 ecc_ec_mult(x, y, u2, tmp2_x, tmp2_y);
624 // tmp3 = tmp1 + tmp2
625 ec_add(tmp1_x, tmp1_y, tmp2_x, tmp2_y, tmp3_x, tmp3_y);
626 // TODO: this u_1 * G + u_2 * Q_A could be optimiced with Straus's algorithm.
628 return isSame(tmp3_x, r, arrayLength) ? 0 : -1;
631 int ecc_is_valid_key(const uint32_t * priv_key)
633 return isGreater(ecc_order_m, priv_key, arrayLength) == 1;
637 * This exports the low level functions so the tests can use them.
638 * In real use the compiler is now bale to optimice the code better.
641 uint32_t ecc_add( const uint32_t *x, const uint32_t *y, uint32_t *result, uint8_t length)
643 return add(x, y, result, length);
645 uint32_t ecc_sub( const uint32_t *x, const uint32_t *y, uint32_t *result, uint8_t length)
647 return sub(x, y, result, length);
649 int ecc_fieldAdd(const uint32_t *x, const uint32_t *y, const uint32_t *reducer, uint32_t *result)
651 return fieldAdd(x, y, reducer, result);
653 int ecc_fieldSub(const uint32_t *x, const uint32_t *y, const uint32_t *modulus, uint32_t *result)
655 return fieldSub(x, y, modulus, result);
657 int ecc_fieldMult(const uint32_t *x, const uint32_t *y, uint32_t *result, uint8_t length)
659 return fieldMult(x, y, result, length);
661 void ecc_fieldModP(uint32_t *A, const uint32_t *B)
665 void ecc_fieldModO(const uint32_t *A, uint32_t *result, uint8_t length)
667 fieldModO(A, result, length);
669 void ecc_fieldInv(const uint32_t *A, const uint32_t *modulus, const uint32_t *reducer, uint32_t *B)
671 fieldInv(A, modulus, reducer, B);
673 void ecc_copy(const uint32_t *from, uint32_t *to, uint8_t length)
675 copy(from, to, length);
677 int ecc_isSame(const uint32_t *A, const uint32_t *B, uint8_t length)
679 return isSame(A, B, length);
681 void ecc_setZero(uint32_t *A, const int length)
685 int ecc_isOne(const uint32_t* A)
689 void ecc_rshift(uint32_t* A)
693 int ecc_isGreater(const uint32_t *A, const uint32_t *B, uint8_t length)
695 return isGreater(A, B , length);
698 void ecc_ec_add(const uint32_t *px, const uint32_t *py, const uint32_t *qx, const uint32_t *qy, uint32_t *Sx, uint32_t *Sy)
700 ec_add(px, py, qx, qy, Sx, Sy);
702 void ecc_ec_double(const uint32_t *px, const uint32_t *py, uint32_t *Dx, uint32_t *Dy)
704 ec_double(px, py, Dx, Dy);
707 #endif /* TEST_INCLUDE */