3 * Tries the performance of the various algorithms.
7 /* nettle, low-level cryptographics library
9 * Copyright (C) 2001, 2010 Niels Möller
11 * The nettle library is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License as published by
13 * the Free Software Foundation; either version 2.1 of the License, or (at your
14 * option) any later version.
16 * The nettle library is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 * License for more details.
21 * You should have received a copy of the GNU Lesser General Public License
22 * along with the nettle library; see the file COPYING.LIB. If not, write to
23 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
60 #include "nettle-meta.h"
61 #include "nettle-internal.h"
65 static double frequency = 0.0;
67 /* Process BENCH_BLOCK bytes at a time, for BENCH_INTERVAL seconds. */
68 #define BENCH_BLOCK 10240
69 #define BENCH_INTERVAL 0.1
71 /* FIXME: Proper configure test for rdtsc? */
72 #ifndef WITH_CYCLE_COUNTER
73 # if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
74 # define WITH_CYCLE_COUNTER 1
76 # define WITH_CYCLE_COUNTER 0
80 #if WITH_CYCLE_COUNTER
81 # if defined(__i386__)
82 #define GET_CYCLE_COUNTER(hi, lo) \
83 __asm__("xorl %%eax,%%eax\n" \
84 "movl %%ebx, %%edi\n" \
87 "movl %%edi, %%ebx\n" \
88 : "=a" (lo), "=d" (hi) \
90 : "%edi", "%ecx", "cc")
91 # elif defined(__x86_64__)
92 #define GET_CYCLE_COUNTER(hi, lo) \
93 __asm__("xorl %%eax,%%eax\n" \
94 "mov %%rbx, %%r10\n" \
97 "mov %%r10, %%rbx\n" \
98 : "=a" (lo), "=d" (hi) \
100 : "%r10", "%rcx", "cc")
102 #define BENCH_ITERATIONS 10
105 static void NORETURN PRINTF_STYLE(1,2)
106 die(const char *format, ...)
109 va_start(args, format);
110 vfprintf(stderr, format, args);
116 static double overhead = 0.0;
118 /* Returns second per function call */
120 time_function(void (*f)(void *arg), void *arg)
130 for (i = 0; i < ncalls; i++)
132 elapsed = time_end();
133 if (elapsed > BENCH_INTERVAL)
135 else if (elapsed < BENCH_INTERVAL / 10)
140 return elapsed / ncalls - overhead;
144 bench_nothing(void *arg UNUSED)
149 struct bench_memxor_info
153 const uint8_t *other;
157 bench_memxor(void *arg)
159 struct bench_memxor_info *info = arg;
160 memxor (info->dst, info->src, BENCH_BLOCK);
164 bench_memxor3(void *arg)
166 struct bench_memxor_info *info = arg;
167 memxor3 (info->dst, info->src, info->other, BENCH_BLOCK);
170 struct bench_hash_info
173 nettle_hash_update_func *update;
178 bench_hash(void *arg)
180 struct bench_hash_info *info = arg;
181 info->update(info->ctx, BENCH_BLOCK, info->data);
184 struct bench_cipher_info
187 nettle_crypt_func *crypt;
192 bench_cipher(void *arg)
194 struct bench_cipher_info *info = arg;
195 info->crypt(info->ctx, BENCH_BLOCK, info->data, info->data);
198 struct bench_cbc_info
201 nettle_crypt_func *crypt;
210 bench_cbc_encrypt(void *arg)
212 struct bench_cbc_info *info = arg;
213 cbc_encrypt(info->ctx, info->crypt,
214 info->block_size, info->iv,
215 BENCH_BLOCK, info->data, info->data);
219 bench_cbc_decrypt(void *arg)
221 struct bench_cbc_info *info = arg;
222 cbc_decrypt(info->ctx, info->crypt,
223 info->block_size, info->iv,
224 BENCH_BLOCK, info->data, info->data);
230 struct bench_cbc_info *info = arg;
231 ctr_crypt(info->ctx, info->crypt,
232 info->block_size, info->iv,
233 BENCH_BLOCK, info->data, info->data);
236 /* Set data[i] = floor(sqrt(i)) */
238 init_data(uint8_t *data)
241 for (i = j = 0; i<BENCH_BLOCK; i++)
250 init_key(unsigned length,
254 for (i = 0; i<length; i++)
261 printf("%18s %11s Mbyte/s%s\n",
263 frequency > 0.0 ? " cycles/byte cycles/block" : "");
267 display(const char *name, const char *mode, unsigned block_size,
270 printf("%18s %11s %7.2f",
272 BENCH_BLOCK / (time * 1048576.0));
275 printf(" %11.2f", time * frequency / BENCH_BLOCK);
277 printf(" %12.2f", time * frequency * block_size / BENCH_BLOCK);
285 void *p = malloc(size);
287 die("Virtual memory exhausted.\n");
295 overhead = time_function(bench_nothing, NULL);
296 printf("benchmark call overhead: %7f us", overhead * 1e6);
298 printf("%7.2f cycles\n", overhead * frequency);
307 struct bench_memxor_info info;
308 uint8_t src[BENCH_BLOCK + sizeof(long)];
309 uint8_t other[BENCH_BLOCK + sizeof(long)];
310 uint8_t dst[BENCH_BLOCK];
315 display ("memxor", "aligned", sizeof(unsigned long),
316 time_function(bench_memxor, &info));
318 display ("memxor", "unaligned", sizeof(unsigned long),
319 time_function(bench_memxor, &info));
323 display ("memxor3", "aligned", sizeof(unsigned long),
324 time_function(bench_memxor3, &info));
326 info.other = other + 1;
327 display ("memxor3", "unaligned01", sizeof(unsigned long),
328 time_function(bench_memxor3, &info));
330 display ("memxor3", "unaligned11", sizeof(unsigned long),
331 time_function(bench_memxor3, &info));
332 info.other = other + 2;
333 display ("memxor3", "unaligned12", sizeof(unsigned long),
334 time_function(bench_memxor3, &info));
338 time_hash(const struct nettle_hash *hash)
340 static uint8_t data[BENCH_BLOCK];
341 struct bench_hash_info info;
343 info.ctx = xalloc(hash->context_size);
344 info.update = hash->update;
348 hash->init(info.ctx);
350 display(hash->name, "update", hash->block_size,
351 time_function(bench_hash, &info));
359 static uint8_t data[BENCH_BLOCK];
360 struct bench_hash_info info;
361 struct umac32_ctx ctx32;
362 struct umac64_ctx ctx64;
363 struct umac96_ctx ctx96;
364 struct umac128_ctx ctx128;
368 umac32_set_key (&ctx32, key);
370 info.update = (nettle_hash_update_func *) umac32_update;
373 display("umac32", "update", UMAC_DATA_SIZE,
374 time_function(bench_hash, &info));
376 umac64_set_key (&ctx64, key);
378 info.update = (nettle_hash_update_func *) umac64_update;
381 display("umac64", "update", UMAC_DATA_SIZE,
382 time_function(bench_hash, &info));
384 umac96_set_key (&ctx96, key);
386 info.update = (nettle_hash_update_func *) umac96_update;
389 display("umac96", "update", UMAC_DATA_SIZE,
390 time_function(bench_hash, &info));
392 umac128_set_key (&ctx128, key);
394 info.update = (nettle_hash_update_func *) umac128_update;
397 display("umac128", "update", UMAC_DATA_SIZE,
398 time_function(bench_hash, &info));
404 static uint8_t data[BENCH_BLOCK];
405 struct bench_hash_info hinfo;
406 struct bench_cipher_info cinfo;
407 struct gcm_aes_ctx ctx;
410 uint8_t iv[GCM_IV_SIZE];
412 gcm_aes_set_key(&ctx, sizeof(key), key);
413 gcm_aes_set_iv(&ctx, sizeof(iv), iv);
416 hinfo.update = (nettle_hash_update_func *) gcm_aes_update;
419 display("gcm-aes", "update", GCM_BLOCK_SIZE,
420 time_function(bench_hash, &hinfo));
423 cinfo.crypt = (nettle_crypt_func *) gcm_aes_encrypt;
426 display("gcm-aes", "encrypt", GCM_BLOCK_SIZE,
427 time_function(bench_cipher, &cinfo));
429 cinfo.crypt = (nettle_crypt_func *) gcm_aes_decrypt;
431 display("gcm-aes", "decrypt", GCM_BLOCK_SIZE,
432 time_function(bench_cipher, &cinfo));
436 prefix_p(const char *prefix, const char *s)
439 for (i = 0; prefix[i]; i++)
440 if (prefix[i] != s[i])
446 block_cipher_p(const struct nettle_cipher *cipher)
448 /* Don't use nettle cbc and ctr for openssl ciphers. */
449 return cipher->block_size > 0 && !prefix_p("openssl", cipher->name);
453 time_cipher(const struct nettle_cipher *cipher)
455 void *ctx = xalloc(cipher->context_size);
456 uint8_t *key = xalloc(cipher->key_size);
458 static uint8_t data[BENCH_BLOCK];
465 /* Decent initializers are a GNU extension, so don't use it here. */
466 struct bench_cipher_info info;
468 info.crypt = cipher->encrypt;
471 init_key(cipher->key_size, key);
472 cipher->set_encrypt_key(ctx, cipher->key_size, key);
474 display(cipher->name, "ECB encrypt", cipher->block_size,
475 time_function(bench_cipher, &info));
479 struct bench_cipher_info info;
481 info.crypt = cipher->decrypt;
484 init_key(cipher->key_size, key);
485 cipher->set_decrypt_key(ctx, cipher->key_size, key);
487 display(cipher->name, "ECB decrypt", cipher->block_size,
488 time_function(bench_cipher, &info));
491 if (block_cipher_p(cipher))
493 uint8_t *iv = xalloc(cipher->block_size);
497 struct bench_cbc_info info;
499 info.crypt = cipher->encrypt;
501 info.block_size = cipher->block_size;
504 memset(iv, 0, sizeof(iv));
506 cipher->set_encrypt_key(ctx, cipher->key_size, key);
508 display(cipher->name, "CBC encrypt", cipher->block_size,
509 time_function(bench_cbc_encrypt, &info));
513 struct bench_cbc_info info;
515 info.crypt = cipher->decrypt;
517 info.block_size = cipher->block_size;
520 memset(iv, 0, sizeof(iv));
522 cipher->set_decrypt_key(ctx, cipher->key_size, key);
524 display(cipher->name, "CBC decrypt", cipher->block_size,
525 time_function(bench_cbc_decrypt, &info));
530 struct bench_cbc_info info;
532 info.crypt = cipher->encrypt;
534 info.block_size = cipher->block_size;
537 memset(iv, 0, sizeof(iv));
539 cipher->set_encrypt_key(ctx, cipher->key_size, key);
541 display(cipher->name, "CTR", cipher->block_size,
542 time_function(bench_ctr, &info));
551 /* Try to get accurate cycle times for assembler functions. */
552 #if WITH_CYCLE_COUNTER
554 compare_double(const void *ap, const void *bp)
556 double a = *(const double *) ap;
557 double b = *(const double *) bp;
566 #define TIME_CYCLES(t, code) do { \
567 double tc_count[5]; \
568 uint32_t tc_start_lo, tc_start_hi, tc_end_lo, tc_end_hi; \
569 unsigned tc_i, tc_j; \
570 for (tc_j = 0; tc_j < 5; tc_j++) \
573 GET_CYCLE_COUNTER(tc_start_hi, tc_start_lo); \
574 for (; tc_i < BENCH_ITERATIONS; tc_i++) \
577 GET_CYCLE_COUNTER(tc_end_hi, tc_end_lo); \
579 tc_end_hi -= (tc_start_hi + (tc_start_lo > tc_end_lo)); \
580 tc_end_lo -= tc_start_lo; \
582 tc_count[tc_j] = ldexp(tc_end_hi, 32) + tc_end_lo; \
584 qsort(tc_count, 5, sizeof(double), compare_double); \
585 (t) = tc_count[2] / BENCH_ITERATIONS; \
589 bench_sha1_compress(void)
591 uint32_t state[_SHA1_DIGEST_LENGTH];
592 uint8_t data[SHA1_DATA_SIZE];
595 TIME_CYCLES (t, _nettle_sha1_compress(state, data));
597 printf("sha1_compress: %.2f cycles\n", t);
601 bench_salsa20_core(void)
603 uint32_t state[_SALSA20_INPUT_LENGTH];
606 TIME_CYCLES (t, _nettle_salsa20_core(state, state, 20));
607 printf("salsa20_core: %.2f cycles\n", t);
611 bench_sha3_permute(void)
613 struct sha3_state state;
616 TIME_CYCLES (t, sha3_permute (&state));
617 printf("sha3_permute: %.2f cycles (%.2f / round)\n", t, t / 24.0);
620 #define bench_sha1_compress()
621 #define bench_salsa20_core()
622 #define bench_sha3_permute()
626 # define OPENSSL(x) x,
632 main(int argc, char **argv)
638 const struct nettle_hash *hashes[] =
640 &nettle_md2, &nettle_md4, &nettle_md5,
641 OPENSSL(&nettle_openssl_md5)
642 &nettle_sha1, OPENSSL(&nettle_openssl_sha1)
643 &nettle_sha224, &nettle_sha256,
644 &nettle_sha384, &nettle_sha512,
645 &nettle_sha3_224, &nettle_sha3_256,
646 &nettle_sha3_384, &nettle_sha3_512,
647 &nettle_ripemd160, &nettle_gosthash94,
651 const struct nettle_cipher *ciphers[] =
653 &nettle_aes128, &nettle_aes192, &nettle_aes256,
654 OPENSSL(&nettle_openssl_aes128)
655 OPENSSL(&nettle_openssl_aes192)
656 OPENSSL(&nettle_openssl_aes256)
657 &nettle_arcfour128, OPENSSL(&nettle_openssl_arcfour128)
658 &nettle_blowfish128, OPENSSL(&nettle_openssl_blowfish128)
659 &nettle_camellia128, &nettle_camellia192, &nettle_camellia256,
660 &nettle_cast128, OPENSSL(&nettle_openssl_cast128)
661 &nettle_des, OPENSSL(&nettle_openssl_des)
664 &nettle_twofish128, &nettle_twofish192, &nettle_twofish256,
665 &nettle_salsa20, &nettle_salsa20r12,
669 enum { OPT_HELP = 300 };
670 static const struct option options[] =
672 /* Name, args, flag, val */
673 { "help", no_argument, NULL, OPT_HELP },
674 { "clock-frequency", required_argument, NULL, 'f' },
678 while ( (c = getopt_long(argc, argv, "f:", options, NULL)) != -1)
682 frequency = atof(optarg);
687 printf("Usage: nettle-benchmark [-f clock frequency] [alg]\n");
700 bench_sha1_compress();
701 bench_salsa20_core();
702 bench_sha3_permute();
708 if (!alg || strstr ("memxor", alg))
714 for (i = 0; hashes[i]; i++)
715 if (!alg || strstr(hashes[i]->name, alg))
716 time_hash(hashes[i]);
718 if (!alg || strstr ("umac", alg))
721 for (i = 0; ciphers[i]; i++)
722 if (!alg || strstr(ciphers[i]->name, alg))
723 time_cipher(ciphers[i]);
725 if (!alg || strstr ("gcm", alg))