1 /* ====================================================================
2 * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ==================================================================== */
49 #include <openssl/modes.h>
53 #include <openssl/mem.h>
54 #include <openssl/cpu.h>
57 #include "../internal.h"
60 #if !defined(OPENSSL_NO_ASM) && \
61 (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM))
65 #if defined(BSWAP4) && STRICT_ALIGNMENT == 1
66 /* redefine, because alignment is ensured */
68 #define GETU32(p) BSWAP4(*(const uint32_t *)(p))
70 #define PUTU32(p, v) *(uint32_t *)(p) = BSWAP4(v)
73 #define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
74 #define REDUCE1BIT(V) \
76 if (sizeof(size_t) == 8) { \
77 uint64_t T = OPENSSL_U64(0xe100000000000000) & (0 - (V.lo & 1)); \
78 V.lo = (V.hi << 63) | (V.lo >> 1); \
79 V.hi = (V.hi >> 1) ^ T; \
81 uint32_t T = 0xe1000000U & (0 - (uint32_t)(V.lo & 1)); \
82 V.lo = (V.hi << 63) | (V.lo >> 1); \
83 V.hi = (V.hi >> 1) ^ ((uint64_t)T << 32); \
88 static void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) {
103 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
105 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
106 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
107 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
109 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
110 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
111 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
112 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
113 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
114 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
115 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
117 #if defined(GHASH_ASM) && defined(OPENSSL_ARM)
118 /* ARM assembler expects specific dword order in Htable. */
126 if (is_endian.little) {
127 for (j = 0; j < 16; ++j) {
133 for (j = 0; j < 16; ++j) {
135 Htable[j].hi = V.lo << 32 | V.lo >> 32;
136 Htable[j].lo = V.hi << 32 | V.hi >> 32;
143 #if !defined(GHASH_ASM)
144 static const size_t rem_4bit[16] = {
145 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
146 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
147 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
148 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)};
150 static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) {
153 size_t rem, nlo, nhi;
159 nlo = ((const uint8_t *)Xi)[15];
163 Z.hi = Htable[nlo].hi;
164 Z.lo = Htable[nlo].lo;
167 rem = (size_t)Z.lo & 0xf;
168 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
170 if (sizeof(size_t) == 8) {
171 Z.hi ^= rem_4bit[rem];
173 Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
176 Z.hi ^= Htable[nhi].hi;
177 Z.lo ^= Htable[nhi].lo;
183 nlo = ((const uint8_t *)Xi)[cnt];
187 rem = (size_t)Z.lo & 0xf;
188 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
190 if (sizeof(size_t) == 8) {
191 Z.hi ^= rem_4bit[rem];
193 Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
196 Z.hi ^= Htable[nlo].hi;
197 Z.lo ^= Htable[nlo].lo;
200 if (is_endian.little) {
202 Xi[0] = BSWAP8(Z.hi);
203 Xi[1] = BSWAP8(Z.lo);
205 uint8_t *p = (uint8_t *)Xi;
207 v = (uint32_t)(Z.hi >> 32);
209 v = (uint32_t)(Z.hi);
211 v = (uint32_t)(Z.lo >> 32);
213 v = (uint32_t)(Z.lo);
222 /* Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
223 * details... Compiler-generated code doesn't seem to give any
224 * performance improvement, at least not on x86[_64]. It's here
225 * mostly as reference and a placeholder for possible future
226 * non-trivial optimization[s]... */
227 static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
231 size_t rem, nlo, nhi;
239 nlo = ((const uint8_t *)Xi)[15];
244 Z.hi = Htable[nlo].hi;
245 Z.lo = Htable[nlo].lo;
248 rem = (size_t)Z.lo & 0xf;
249 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
251 if (sizeof(size_t) == 8) {
252 Z.hi ^= rem_4bit[rem];
254 Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
257 Z.hi ^= Htable[nhi].hi;
258 Z.lo ^= Htable[nhi].lo;
264 nlo = ((const uint8_t *)Xi)[cnt];
269 rem = (size_t)Z.lo & 0xf;
270 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
272 if (sizeof(size_t) == 8) {
273 Z.hi ^= rem_4bit[rem];
275 Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
278 Z.hi ^= Htable[nlo].hi;
279 Z.lo ^= Htable[nlo].lo;
282 if (is_endian.little) {
284 Xi[0] = BSWAP8(Z.hi);
285 Xi[1] = BSWAP8(Z.lo);
287 uint8_t *p = (uint8_t *)Xi;
289 v = (uint32_t)(Z.hi >> 32);
291 v = (uint32_t)(Z.hi);
293 v = (uint32_t)(Z.lo >> 32);
295 v = (uint32_t)(Z.lo);
302 } while (inp += 16, len -= 16);
304 #else /* GHASH_ASM */
305 void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
306 void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
310 #define GCM_MUL(ctx, Xi) gcm_gmult_4bit(ctx->Xi.u, ctx->Htable)
311 #if defined(GHASH_ASM)
312 #define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len)
313 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
314 * trashing effect. In other words idea is to hash data while it's
315 * still in L1 cache after encryption pass... */
316 #define GHASH_CHUNK (3 * 1024)
320 #if defined(GHASH_ASM)
321 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
322 #define GHASH_ASM_X86_OR_64
323 #define GCM_FUNCREF_4BIT
324 void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
325 void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
326 void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
329 #if defined(OPENSSL_X86)
330 #define gcm_init_avx gcm_init_clmul
331 #define gcm_gmult_avx gcm_gmult_clmul
332 #define gcm_ghash_avx gcm_ghash_clmul
334 void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]);
335 void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]);
336 void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, size_t len);
339 #if defined(OPENSSL_X86)
340 #define GHASH_ASM_X86
341 void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]);
342 void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
345 void gcm_gmult_4bit_x86(uint64_t Xi[2], const u128 Htable[16]);
346 void gcm_ghash_4bit_x86(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
349 #elif defined(OPENSSL_ARM)
350 #include "../arm_arch.h"
351 #if __ARM_ARCH__ >= 7
352 #define GHASH_ASM_ARM
353 #define GCM_FUNCREF_4BIT
354 void gcm_init_neon(u128 Htable[16],const uint64_t Xi[2]);
355 void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]);
356 void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
362 #ifdef GCM_FUNCREF_4BIT
364 #define GCM_MUL(ctx, Xi) (*gcm_gmult_p)(ctx->Xi.u, ctx->Htable)
367 #define GHASH(ctx, in, len) (*gcm_ghash_p)(ctx->Xi.u, ctx->Htable, in, len)
371 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block) {
374 ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT));
376 CRYPTO_gcm128_init(ret, key, block);
382 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) {
388 memset(ctx, 0, sizeof(*ctx));
392 (*block)(ctx->H.c, ctx->H.c, key);
394 if (is_endian.little) {
395 /* H is stored in host byte order */
397 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
398 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
400 uint8_t *p = ctx->H.c;
402 hi = (uint64_t)GETU32(p) << 32 | GETU32(p + 4);
403 lo = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12);
409 #if defined(GHASH_ASM_X86_OR_64)
410 if (crypto_gcm_clmul_enabled()) {
411 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
412 gcm_init_avx(ctx->Htable, ctx->H.u);
413 ctx->gmult = gcm_gmult_avx;
414 ctx->ghash = gcm_ghash_avx;
416 gcm_init_clmul(ctx->Htable, ctx->H.u);
417 ctx->gmult = gcm_gmult_clmul;
418 ctx->ghash = gcm_ghash_clmul;
422 gcm_init_4bit(ctx->Htable, ctx->H.u);
423 #if defined(GHASH_ASM_X86) /* x86 only */
424 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
425 ctx->gmult = gcm_gmult_4bit_mmx;
426 ctx->ghash = gcm_ghash_4bit_mmx;
428 ctx->gmult = gcm_gmult_4bit_x86;
429 ctx->ghash = gcm_ghash_4bit_x86;
432 ctx->gmult = gcm_gmult_4bit;
433 ctx->ghash = gcm_ghash_4bit;
435 #elif defined(GHASH_ASM_ARM)
436 if (CRYPTO_is_NEON_capable()) {
437 gcm_init_neon(ctx->Htable,ctx->H.u);
438 ctx->gmult = gcm_gmult_neon;
439 ctx->ghash = gcm_ghash_neon;
441 gcm_init_4bit(ctx->Htable, ctx->H.u);
442 ctx->gmult = gcm_gmult_4bit;
443 ctx->ghash = gcm_ghash_4bit;
446 ctx->gmult = gcm_gmult_4bit;
447 ctx->ghash = gcm_ghash_4bit;
448 gcm_init_4bit(ctx->Htable, ctx->H.u);
452 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const uint8_t *iv, size_t len) {
458 #ifdef GCM_FUNCREF_4BIT
459 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
466 ctx->len.u[0] = 0; /* AAD length */
467 ctx->len.u[1] = 0; /* message length */
472 memcpy(ctx->Yi.c, iv, 12);
480 for (i = 0; i < 16; ++i) {
481 ctx->Yi.c[i] ^= iv[i];
488 for (i = 0; i < len; ++i) {
489 ctx->Yi.c[i] ^= iv[i];
494 if (is_endian.little) {
496 ctx->Yi.u[1] ^= BSWAP8(len0);
498 ctx->Yi.c[8] ^= (uint8_t)(len0 >> 56);
499 ctx->Yi.c[9] ^= (uint8_t)(len0 >> 48);
500 ctx->Yi.c[10] ^= (uint8_t)(len0 >> 40);
501 ctx->Yi.c[11] ^= (uint8_t)(len0 >> 32);
502 ctx->Yi.c[12] ^= (uint8_t)(len0 >> 24);
503 ctx->Yi.c[13] ^= (uint8_t)(len0 >> 16);
504 ctx->Yi.c[14] ^= (uint8_t)(len0 >> 8);
505 ctx->Yi.c[15] ^= (uint8_t)(len0);
508 ctx->Yi.u[1] ^= len0;
513 if (is_endian.little) {
514 ctr = GETU32(ctx->Yi.c + 12);
520 (*ctx->block)(ctx->Yi.c, ctx->EK0.c, ctx->key);
522 if (is_endian.little) {
523 PUTU32(ctx->Yi.c + 12, ctr);
529 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) {
532 uint64_t alen = ctx->len.u[0];
533 #ifdef GCM_FUNCREF_4BIT
534 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
536 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
537 size_t len) = ctx->ghash;
546 if (alen > (OPENSSL_U64(1) << 61) || (sizeof(len) == 8 && alen < len)) {
549 ctx->len.u[0] = alen;
554 ctx->Xi.c[n] ^= *(aad++);
567 if ((i = (len & (size_t) - 16))) {
574 for (i = 0; i < 16; ++i) {
575 ctx->Xi.c[i] ^= aad[i];
583 n = (unsigned int)len;
584 for (i = 0; i < len; ++i)
585 ctx->Xi.c[i] ^= aad[i];
592 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const unsigned char *in,
593 unsigned char *out, size_t len) {
600 uint64_t mlen = ctx->len.u[1];
601 block128_f block = ctx->block;
602 void *key = ctx->key;
603 #ifdef GCM_FUNCREF_4BIT
604 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
606 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
607 size_t len) = ctx->ghash;
612 if (mlen > ((OPENSSL_U64(1) << 36) - 32) ||
613 (sizeof(len) == 8 && mlen < len)) {
616 ctx->len.u[1] = mlen;
619 /* First call to encrypt finalizes GHASH(AAD) */
624 if (is_endian.little) {
625 ctr = GETU32(ctx->Yi.c + 12);
633 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
644 if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
645 for (i = 0; i < len; ++i) {
647 (*block)(ctx->Yi.c, ctx->EKi.c, key);
649 if (is_endian.little) {
650 PUTU32(ctx->Yi.c + 12, ctr);
655 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
665 #if defined(GHASH) && defined(GHASH_CHUNK)
666 while (len >= GHASH_CHUNK) {
667 size_t j = GHASH_CHUNK;
670 size_t *out_t = (size_t *)out;
671 const size_t *in_t = (const size_t *)in;
673 (*block)(ctx->Yi.c, ctx->EKi.c, key);
675 if (is_endian.little) {
676 PUTU32(ctx->Yi.c + 12, ctr);
680 for (i = 0; i < 16 / sizeof(size_t); ++i) {
681 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
687 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
690 if ((i = (len & (size_t) - 16))) {
694 size_t *out_t = (size_t *)out;
695 const size_t *in_t = (const size_t *)in;
697 (*block)(ctx->Yi.c, ctx->EKi.c, key);
699 if (is_endian.little) {
700 PUTU32(ctx->Yi.c + 12, ctr);
704 for (i = 0; i < 16 / sizeof(size_t); ++i) {
705 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
711 GHASH(ctx, out - j, j);
715 size_t *out_t = (size_t *)out;
716 const size_t *in_t = (const size_t *)in;
718 (*block)(ctx->Yi.c, ctx->EKi.c, key);
720 if (is_endian.little) {
721 PUTU32(ctx->Yi.c + 12, ctr);
725 for (i = 0; i < 16 / sizeof(size_t); ++i) {
726 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
735 (*block)(ctx->Yi.c, ctx->EKi.c, key);
737 if (is_endian.little) {
738 PUTU32(ctx->Yi.c + 12, ctr);
743 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
752 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const unsigned char *in,
753 unsigned char *out, size_t len) {
760 uint64_t mlen = ctx->len.u[1];
761 block128_f block = ctx->block;
762 void *key = ctx->key;
763 #ifdef GCM_FUNCREF_4BIT
764 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
766 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
767 size_t len) = ctx->ghash;
772 if (mlen > ((OPENSSL_U64(1) << 36) - 32) ||
773 (sizeof(len) == 8 && mlen < len)) {
776 ctx->len.u[1] = mlen;
779 /* First call to decrypt finalizes GHASH(AAD) */
784 if (is_endian.little) {
785 ctr = GETU32(ctx->Yi.c + 12);
794 *(out++) = c ^ ctx->EKi.c[n];
806 if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
807 for (i = 0; i < len; ++i) {
810 (*block)(ctx->Yi.c, ctx->EKi.c, key);
812 if (is_endian.little) {
813 PUTU32(ctx->Yi.c + 12, ctr);
819 out[i] = c ^ ctx->EKi.c[n];
830 #if defined(GHASH) && defined(GHASH_CHUNK)
831 while (len >= GHASH_CHUNK) {
832 size_t j = GHASH_CHUNK;
834 GHASH(ctx, in, GHASH_CHUNK);
836 size_t *out_t = (size_t *)out;
837 const size_t *in_t = (const size_t *)in;
839 (*block)(ctx->Yi.c, ctx->EKi.c, key);
841 if (is_endian.little) {
842 PUTU32(ctx->Yi.c + 12, ctr);
846 for (i = 0; i < 16 / sizeof(size_t); ++i) {
847 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
855 if ((i = (len & (size_t) - 16))) {
858 size_t *out_t = (size_t *)out;
859 const size_t *in_t = (const size_t *)in;
861 (*block)(ctx->Yi.c, ctx->EKi.c, key);
863 if (is_endian.little) {
864 PUTU32(ctx->Yi.c + 12, ctr);
868 for (i = 0; i < 16 / sizeof(size_t); ++i) {
869 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
878 size_t *out_t = (size_t *)out;
879 const size_t *in_t = (const size_t *)in;
881 (*block)(ctx->Yi.c, ctx->EKi.c, key);
883 if (is_endian.little) {
884 PUTU32(ctx->Yi.c + 12, ctr);
888 for (i = 0; i < 16 / sizeof(size_t); ++i) {
890 out_t[i] = c ^ ctx->EKi.t[i];
900 (*block)(ctx->Yi.c, ctx->EKi.c, key);
902 if (is_endian.little) {
903 PUTU32(ctx->Yi.c + 12, ctr);
910 out[n] = c ^ ctx->EKi.c[n];
919 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const uint8_t *in,
920 uint8_t *out, size_t len, ctr128_f stream) {
927 uint64_t mlen = ctx->len.u[1];
928 void *key = ctx->key;
929 #ifdef GCM_FUNCREF_4BIT
930 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
932 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
933 size_t len) = ctx->ghash;
938 if (mlen > ((OPENSSL_U64(1) << 36) - 32) ||
939 (sizeof(len) == 8 && mlen < len)) {
942 ctx->len.u[1] = mlen;
945 /* First call to encrypt finalizes GHASH(AAD) */
950 if (is_endian.little) {
951 ctr = GETU32(ctx->Yi.c + 12);
959 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
971 while (len >= GHASH_CHUNK) {
972 (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
973 ctr += GHASH_CHUNK / 16;
974 if (is_endian.little) {
975 PUTU32(ctx->Yi.c + 12, ctr);
979 GHASH(ctx, out, GHASH_CHUNK);
985 if ((i = (len & (size_t) - 16))) {
988 (*stream)(in, out, j, key, ctx->Yi.c);
989 ctr += (unsigned int)j;
990 if (is_endian.little) {
991 PUTU32(ctx->Yi.c + 12, ctr);
1002 for (i = 0; i < 16; ++i) {
1003 ctx->Xi.c[i] ^= out[i];
1011 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
1013 if (is_endian.little) {
1014 PUTU32(ctx->Yi.c + 12, ctr);
1019 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1028 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const uint8_t *in,
1029 uint8_t *out, size_t len,
1035 unsigned int n, ctr;
1037 uint64_t mlen = ctx->len.u[1];
1038 void *key = ctx->key;
1039 #ifdef GCM_FUNCREF_4BIT
1040 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
1042 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
1043 size_t len) = ctx->ghash;
1048 if (mlen > ((OPENSSL_U64(1) << 36) - 32) ||
1049 (sizeof(len) == 8 && mlen < len)) {
1052 ctx->len.u[1] = mlen;
1055 /* First call to decrypt finalizes GHASH(AAD) */
1060 if (is_endian.little) {
1061 ctr = GETU32(ctx->Yi.c + 12);
1069 uint8_t c = *(in++);
1070 *(out++) = c ^ ctx->EKi.c[n];
1083 while (len >= GHASH_CHUNK) {
1084 GHASH(ctx, in, GHASH_CHUNK);
1085 (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1086 ctr += GHASH_CHUNK / 16;
1087 if (is_endian.little)
1088 PUTU32(ctx->Yi.c + 12, ctr);
1096 if ((i = (len & (size_t) - 16))) {
1104 for (k = 0; k < 16; ++k)
1105 ctx->Xi.c[k] ^= in[k];
1112 (*stream)(in, out, j, key, ctx->Yi.c);
1113 ctr += (unsigned int)j;
1114 if (is_endian.little)
1115 PUTU32(ctx->Yi.c + 12, ctr);
1123 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
1125 if (is_endian.little)
1126 PUTU32(ctx->Yi.c + 12, ctr);
1132 out[n] = c ^ ctx->EKi.c[n];
1141 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) {
1146 uint64_t alen = ctx->len.u[0] << 3;
1147 uint64_t clen = ctx->len.u[1] << 3;
1148 #ifdef GCM_FUNCREF_4BIT
1149 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
1152 if (ctx->mres || ctx->ares) {
1156 if (is_endian.little) {
1158 alen = BSWAP8(alen);
1159 clen = BSWAP8(clen);
1161 uint8_t *p = ctx->len.c;
1163 ctx->len.u[0] = alen;
1164 ctx->len.u[1] = clen;
1166 alen = (uint64_t)GETU32(p) << 32 | GETU32(p + 4);
1167 clen = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12);
1171 ctx->Xi.u[0] ^= alen;
1172 ctx->Xi.u[1] ^= clen;
1175 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1176 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1178 if (tag && len <= sizeof(ctx->Xi)) {
1179 return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0;
1185 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) {
1186 CRYPTO_gcm128_finish(ctx, NULL, 0);
1187 memcpy(tag, ctx->Xi.c, len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1190 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) {
1192 OPENSSL_cleanse(ctx, sizeof(*ctx));
1197 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
1198 int crypto_gcm_clmul_enabled(void) {
1200 return OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
1201 OPENSSL_ia32cap_P[1] & (1 << 1); /* check PCLMULQDQ bit */