1 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
4 * This package is an SSL implementation written
5 * by Eric Young (eay@cryptsoft.com).
6 * The implementation was written so as to conform with Netscapes SSL.
8 * This library is free for commercial and non-commercial use as long as
9 * the following conditions are aheared to. The following conditions
10 * apply to all code found in this distribution, be it the RC4, RSA,
11 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
12 * included with this distribution is covered by the same copyright terms
13 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 * Copyright remains Eric Young's, and as such any Copyright notices in
16 * the code are not to be removed.
17 * If this package is used in a product, Eric Young should be given attribution
18 * as the author of the parts of the library used.
19 * This can be in the form of a textual message at program startup or
20 * in documentation (online or textual) provided with the package.
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
25 * 1. Redistributions of source code must retain the copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
30 * 3. All advertising materials mentioning features or use of this software
31 * must display the following acknowledgement:
32 * "This product includes cryptographic software written by
33 * Eric Young (eay@cryptsoft.com)"
34 * The word 'cryptographic' can be left out if the rouines from the library
35 * being used are not cryptographic related :-).
36 * 4. If you include any Windows specific code (or a derivative thereof) from
37 * the apps directory (application code) you must include an acknowledgement:
38 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * The licence and distribution terms for any publically available version or
53 * derivative of this code cannot be changed. i.e. this code cannot simply be
54 * copied and put under another distribution licence
55 * [including the GNU Public Licence.] */
57 #include <openssl/sha.h>
61 #include <openssl/mem.h>
63 #include "../internal.h"
66 /* IMPLEMENTATION NOTES.
68 * As you might have noticed 32-bit hash algorithms:
70 * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
71 * - optimized versions implement two transform functions: one operating
72 * on [aligned] data in host byte order and one - on data in input
74 * - share common byte-order neutral collector and padding function
75 * implementations, ../md32_common.h;
77 * Neither of the above applies to this SHA-512 implementations. Reasons
78 * [in reverse order] are:
80 * - it's the only 64-bit hash algorithm for the moment of this writing,
81 * there is no need for common collector/padding implementation [yet];
82 * - by supporting only one transform function [which operates on
83 * *aligned* data in input stream byte order, big-endian in this case]
84 * we minimize burden of maintenance in two ways: a) collector/padding
85 * function is simpler; b) only one transform function to stare at;
86 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
87 * apply a number of optimizations to mitigate potential performance
88 * penalties caused by previous design decision; */
90 #if !defined(OPENSSL_NO_ASM) && \
91 (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM))
92 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
96 int SHA384_Init(SHA512_CTX *sha) {
97 sha->h[0] = OPENSSL_U64(0xcbbb9d5dc1059ed8);
98 sha->h[1] = OPENSSL_U64(0x629a292a367cd507);
99 sha->h[2] = OPENSSL_U64(0x9159015a3070dd17);
100 sha->h[3] = OPENSSL_U64(0x152fecd8f70e5939);
101 sha->h[4] = OPENSSL_U64(0x67332667ffc00b31);
102 sha->h[5] = OPENSSL_U64(0x8eb44a8768581511);
103 sha->h[6] = OPENSSL_U64(0xdb0c2e0d64f98fa7);
104 sha->h[7] = OPENSSL_U64(0x47b5481dbefa4fa4);
109 sha->md_len = SHA384_DIGEST_LENGTH;
114 int SHA512_Init(SHA512_CTX *sha) {
115 sha->h[0] = OPENSSL_U64(0x6a09e667f3bcc908);
116 sha->h[1] = OPENSSL_U64(0xbb67ae8584caa73b);
117 sha->h[2] = OPENSSL_U64(0x3c6ef372fe94f82b);
118 sha->h[3] = OPENSSL_U64(0xa54ff53a5f1d36f1);
119 sha->h[4] = OPENSSL_U64(0x510e527fade682d1);
120 sha->h[5] = OPENSSL_U64(0x9b05688c2b3e6c1f);
121 sha->h[6] = OPENSSL_U64(0x1f83d9abfb41bd6b);
122 sha->h[7] = OPENSSL_U64(0x5be0cd19137e2179);
127 sha->md_len = SHA512_DIGEST_LENGTH;
131 uint8_t *SHA384(const uint8_t *data, size_t len, uint8_t *out) {
133 static uint8_t buf[SHA384_DIGEST_LENGTH];
135 /* TODO(fork): remove this static buffer. */
141 SHA512_Update(&ctx, data, len);
142 SHA512_Final(out, &ctx);
143 OPENSSL_cleanse(&ctx, sizeof(ctx));
147 uint8_t *SHA512(const uint8_t *data, size_t len, uint8_t *out) {
149 static uint8_t buf[SHA512_DIGEST_LENGTH];
151 /* TODO(fork): remove this static buffer. */
156 SHA512_Update(&ctx, data, len);
157 SHA512_Final(out, &ctx);
158 OPENSSL_cleanse(&ctx, sizeof(ctx));
162 #if !defined(SHA512_ASM)
165 void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
168 int SHA384_Final(unsigned char *md, SHA512_CTX *sha) {
169 return SHA512_Final(md, sha);
172 int SHA384_Update(SHA512_CTX *sha, const void *data, size_t len) {
173 return SHA512_Update(sha, data, len);
176 void SHA512_Transform(SHA512_CTX *c, const unsigned char *data) {
177 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
178 if ((size_t)data % sizeof(c->u.d[0]) != 0) {
179 memcpy(c->u.p, data, sizeof(c->u.p));
183 sha512_block_data_order(c, data, 1);
186 int SHA512_Update(SHA512_CTX *c, const void *in_data, size_t len) {
189 const uint8_t *data = (const uint8_t *)in_data;
194 l = (c->Nl + (((uint64_t)len) << 3)) & OPENSSL_U64(0xffffffffffffffff);
198 if (sizeof(len) >= 8) {
199 c->Nh += (((uint64_t)len) >> 61);
204 size_t n = sizeof(c->u) - c->num;
207 memcpy(p + c->num, data, len);
208 c->num += (unsigned int)len;
211 memcpy(p + c->num, data, n), c->num = 0;
214 sha512_block_data_order(c, p, 1);
218 if (len >= sizeof(c->u)) {
219 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
220 if ((size_t)data % sizeof(c->u.d[0]) != 0)
221 while (len >= sizeof(c->u))
222 memcpy(p, data, sizeof(c->u)), sha512_block_data_order(c, p, 1),
223 len -= sizeof(c->u), data += sizeof(c->u);
226 sha512_block_data_order(c, data, len / sizeof(c->u)), data += len,
227 len %= sizeof(c->u), data -= len;
231 memcpy(p, data, len);
238 int SHA512_Final(unsigned char *md, SHA512_CTX *sha) {
239 uint8_t *p = (uint8_t *)sha->u.p;
242 p[n] = 0x80; /* There always is a room for one */
244 if (n > (sizeof(sha->u) - 16)) {
245 memset(p + n, 0, sizeof(sha->u) - n);
247 sha512_block_data_order(sha, p, 1);
250 memset(p + n, 0, sizeof(sha->u) - 16 - n);
251 p[sizeof(sha->u) - 1] = (uint8_t)(sha->Nl);
252 p[sizeof(sha->u) - 2] = (uint8_t)(sha->Nl >> 8);
253 p[sizeof(sha->u) - 3] = (uint8_t)(sha->Nl >> 16);
254 p[sizeof(sha->u) - 4] = (uint8_t)(sha->Nl >> 24);
255 p[sizeof(sha->u) - 5] = (uint8_t)(sha->Nl >> 32);
256 p[sizeof(sha->u) - 6] = (uint8_t)(sha->Nl >> 40);
257 p[sizeof(sha->u) - 7] = (uint8_t)(sha->Nl >> 48);
258 p[sizeof(sha->u) - 8] = (uint8_t)(sha->Nl >> 56);
259 p[sizeof(sha->u) - 9] = (uint8_t)(sha->Nh);
260 p[sizeof(sha->u) - 10] = (uint8_t)(sha->Nh >> 8);
261 p[sizeof(sha->u) - 11] = (uint8_t)(sha->Nh >> 16);
262 p[sizeof(sha->u) - 12] = (uint8_t)(sha->Nh >> 24);
263 p[sizeof(sha->u) - 13] = (uint8_t)(sha->Nh >> 32);
264 p[sizeof(sha->u) - 14] = (uint8_t)(sha->Nh >> 40);
265 p[sizeof(sha->u) - 15] = (uint8_t)(sha->Nh >> 48);
266 p[sizeof(sha->u) - 16] = (uint8_t)(sha->Nh >> 56);
268 sha512_block_data_order(sha, p, 1);
274 switch (sha->md_len) {
275 /* Let compiler decide if it's appropriate to unroll... */
276 case SHA384_DIGEST_LENGTH:
277 for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
278 uint64_t t = sha->h[n];
280 *(md++) = (uint8_t)(t >> 56);
281 *(md++) = (uint8_t)(t >> 48);
282 *(md++) = (uint8_t)(t >> 40);
283 *(md++) = (uint8_t)(t >> 32);
284 *(md++) = (uint8_t)(t >> 24);
285 *(md++) = (uint8_t)(t >> 16);
286 *(md++) = (uint8_t)(t >> 8);
287 *(md++) = (uint8_t)(t);
290 case SHA512_DIGEST_LENGTH:
291 for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
292 uint64_t t = sha->h[n];
294 *(md++) = (uint8_t)(t >> 56);
295 *(md++) = (uint8_t)(t >> 48);
296 *(md++) = (uint8_t)(t >> 40);
297 *(md++) = (uint8_t)(t >> 32);
298 *(md++) = (uint8_t)(t >> 24);
299 *(md++) = (uint8_t)(t >> 16);
300 *(md++) = (uint8_t)(t >> 8);
301 *(md++) = (uint8_t)(t);
304 /* ... as well as make sure md_len is not abused. */
313 static const uint64_t K512[80] = {
314 0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f,
315 0xe9b5dba58189dbbc, 0x3956c25bf348b538, 0x59f111f1b605d019,
316 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, 0xd807aa98a3030242,
317 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
318 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235,
319 0xc19bf174cf692694, 0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
320 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, 0x2de92c6f592b0275,
321 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
322 0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f,
323 0xbf597fc7beef0ee4, 0xc6e00bf33da88fc2, 0xd5a79147930aa725,
324 0x06ca6351e003826f, 0x142929670a0e6e70, 0x27b70a8546d22ffc,
325 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
326 0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6,
327 0x92722c851482353b, 0xa2bfe8a14cf10364, 0xa81a664bbc423001,
328 0xc24b8b70d0f89791, 0xc76c51a30654be30, 0xd192e819d6ef5218,
329 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8,
330 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99,
331 0x34b0bcb5e19b48a8, 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
332 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3, 0x748f82ee5defb2fc,
333 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec,
334 0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915,
335 0xc67178f2e372532b, 0xca273eceea26619c, 0xd186b8c721c0c207,
336 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, 0x06f067aa72176fba,
337 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b,
338 0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc,
339 0x431d67c49c100d4c, 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
340 0x5fcb6fab3ad6faec, 0x6c44198c4a475817};
342 #if defined(__GNUC__) && __GNUC__ >= 2 && !defined(OPENSSL_NO_ASM)
343 #if defined(__x86_64) || defined(__x86_64__)
347 asm("rorq %1,%0" : "=r"(ret) : "J"(n), "0"(a) : "cc"); \
352 uint64_t ret = *((const uint64_t *)(&(x))); \
353 asm("bswapq %0" : "=r"(ret) : "0"(ret)); \
356 #elif(defined(__i386) || defined(__i386__))
359 const unsigned int *p = (const unsigned int *)(&(x)); \
360 unsigned int hi = p[0], lo = p[1]; \
361 asm("bswapl %0; bswapl %1;" : "=r"(lo), "=r"(hi) : "0"(lo), "1"(hi)); \
362 ((uint64_t)hi) << 32 | lo; \
364 #elif(defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
368 asm("rotrdi %0,%1,%2" : "=r"(ret) : "r"(a), "K"(n)); \
371 #elif defined(__aarch64__)
375 asm("ror %0,%1,%2" : "=r"(ret) : "r"(a), "I"(n)); \
378 #if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
379 __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
383 asm("rev %0,%1" : "=r"(ret) : "r"(*((const uint64_t *)(&(x))))); \
388 #elif defined(_MSC_VER)
389 #if defined(_WIN64) /* applies to both IA-64 and AMD64 */
390 #pragma intrinsic(_rotr64)
391 #define ROTR(a, n) _rotr64((a), n)
393 #if defined(_M_IX86) && !defined(OPENSSL_NO_ASM)
394 static uint64_t __fastcall __pull64be(const void *x) {
395 _asm mov edx, [ecx + 0]
396 _asm mov eax, [ecx + 4]
400 #define PULL64(x) __pull64be(&(x))
402 #pragma inline_depth(0)
409 (((uint64_t)(*(((const unsigned char *)(&x)) + j))) << ((7 - j) * 8))
411 (B(x, 0) | B(x, 1) | B(x, 2) | B(x, 3) | B(x, 4) | B(x, 5) | B(x, 6) | \
416 #define ROTR(x, s) (((x) >> s) | (x) << (64 - s))
419 #define Sigma0(x) (ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
420 #define Sigma1(x) (ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
421 #define sigma0(x) (ROTR((x), 1) ^ ROTR((x), 8) ^ ((x) >> 7))
422 #define sigma1(x) (ROTR((x), 19) ^ ROTR((x), 61) ^ ((x) >> 6))
424 #define Ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z)))
425 #define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
428 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
430 * This code should give better results on 32-bit CPU with less than
431 * ~24 registers, both size and performance wise...
433 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
435 const uint64_t *W = in;
437 uint64_t X[9 + 80], *F;
451 for (i = 0; i < 16; i++, F--) {
456 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
458 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
461 for (; i < 80; i++, F--) {
462 T = sigma0(F[8 + 16 - 1]);
463 T += sigma1(F[8 + 16 - 14]);
464 T += F[8 + 16] + F[8 + 16 - 9];
469 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
471 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
489 #define ROUND_00_15(i, a, b, c, d, e, f, g, h) \
491 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i]; \
492 h = Sigma0(a) + Maj(a, b, c); \
497 #define ROUND_16_80(i, j, a, b, c, d, e, f, g, h, X) \
499 s0 = X[(j + 1) & 0x0f]; \
501 s1 = X[(j + 14) & 0x0f]; \
503 T1 = X[(j) & 0x0f] += s0 + s1 + X[(j + 9) & 0x0f]; \
504 ROUND_00_15(i + j, a, b, c, d, e, f, g, h); \
507 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
509 const uint64_t *W = in;
510 uint64_t a, b, c, d, e, f, g, h, s0, s1, T1;
525 T1 = X[0] = PULL64(W[0]);
526 ROUND_00_15(0, a, b, c, d, e, f, g, h);
527 T1 = X[1] = PULL64(W[1]);
528 ROUND_00_15(1, h, a, b, c, d, e, f, g);
529 T1 = X[2] = PULL64(W[2]);
530 ROUND_00_15(2, g, h, a, b, c, d, e, f);
531 T1 = X[3] = PULL64(W[3]);
532 ROUND_00_15(3, f, g, h, a, b, c, d, e);
533 T1 = X[4] = PULL64(W[4]);
534 ROUND_00_15(4, e, f, g, h, a, b, c, d);
535 T1 = X[5] = PULL64(W[5]);
536 ROUND_00_15(5, d, e, f, g, h, a, b, c);
537 T1 = X[6] = PULL64(W[6]);
538 ROUND_00_15(6, c, d, e, f, g, h, a, b);
539 T1 = X[7] = PULL64(W[7]);
540 ROUND_00_15(7, b, c, d, e, f, g, h, a);
541 T1 = X[8] = PULL64(W[8]);
542 ROUND_00_15(8, a, b, c, d, e, f, g, h);
543 T1 = X[9] = PULL64(W[9]);
544 ROUND_00_15(9, h, a, b, c, d, e, f, g);
545 T1 = X[10] = PULL64(W[10]);
546 ROUND_00_15(10, g, h, a, b, c, d, e, f);
547 T1 = X[11] = PULL64(W[11]);
548 ROUND_00_15(11, f, g, h, a, b, c, d, e);
549 T1 = X[12] = PULL64(W[12]);
550 ROUND_00_15(12, e, f, g, h, a, b, c, d);
551 T1 = X[13] = PULL64(W[13]);
552 ROUND_00_15(13, d, e, f, g, h, a, b, c);
553 T1 = X[14] = PULL64(W[14]);
554 ROUND_00_15(14, c, d, e, f, g, h, a, b);
555 T1 = X[15] = PULL64(W[15]);
556 ROUND_00_15(15, b, c, d, e, f, g, h, a);
558 for (i = 16; i < 80; i += 16) {
559 ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
560 ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
561 ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
562 ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
563 ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
564 ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
565 ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
566 ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
567 ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
568 ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
569 ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
570 ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
571 ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
572 ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
573 ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
574 ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
592 #endif /* SHA512_ASM */