check_PROGRAMS = \
- src/tests/test_fastlz_pos \
+ src/tests/test_compression_common \
src/tests/test_fastlz_neg \
src/tests/fuzz_logprint \
src/tests/test_libdlog_container_warning \
TESTS = $(check_PROGRAMS) \
src/tests/lcov.sh # doesn't test anything, but generates a test summary
-src_tests_test_fastlz_pos_SOURCES = external/fastlz/fastlz.c src/tests/test_fastlz_pos.c
-src_tests_test_fastlz_pos_CFLAGS = $(check_CFLAGS)
-src_tests_test_fastlz_pos_LDFLAGS = $(AM_LDFLAGS)
+src_tests_test_compression_common_SOURCES = src/tests/test_compression_common.c src/tests/fastlz_test.c external/fastlz/fastlz.c
+src_tests_test_compression_common_CFLAGS = $(check_CFLAGS)
+src_tests_test_compression_common_LDFLAGS = $(AM_LDFLAGS)
src_tests_test_fastlz_neg_SOURCES = external/fastlz/fastlz.c src/tests/test_fastlz_neg.c
src_tests_test_fastlz_neg_CFLAGS = $(check_CFLAGS)
#define NSEC_PER_SEC 1000000000
#define NSEC_PER_MSEC 1000000
#define USEC_PER_MSEC 1000
+#define MSEC_PER_SEC 1000
#ifdef __cplusplus
extern "C" {
--- /dev/null
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "logcommon.h"
+
+#include "fastlz.h"
+#include "fastlz_test.h"
+#include "test_compression_common.h"
+
+static struct timespec fastlz_compress_test(char *in, size_t in_size, struct common_compressed **out)
+{
+ struct fastlz_compressed *const compressed = malloc(sizeof *compressed);
+ assert(compressed);
+
+ compressed->size_original = in_size;
+ compressed->data = malloc(FASTLZ_NEEDED_OUTPUT_SIZE(in_size));
+ assert(compressed->data);
+
+ for (size_t i = 0; i < WARMUP_CALLS; ++i)
+ compressed->common.size_compressed = fastlz_compress(in, in_size, compressed->data);
+
+ struct timespec begin;
+ clock_gettime(CLOCK_MONOTONIC, &begin);
+
+ for (size_t i = 0; i < ACTUAL_CALLS; ++i)
+ fastlz_compress(in, in_size, compressed->data);
+
+ struct timespec end;
+ clock_gettime(CLOCK_MONOTONIC, &end);
+
+ *out = (struct common_compressed *) compressed;
+
+ subtract_timespec(&end, &begin);
+ return end;
+}
+
+static struct timespec fastlz_decompress_test(struct common_compressed *in, char **re, size_t *re_size)
+{
+ struct fastlz_compressed *const compressed = (struct fastlz_compressed *)in;
+ *re_size = compressed->size_original;
+ *re = malloc(compressed->size_original);
+ assert(*re);
+
+ for (size_t i = 0; i < WARMUP_CALLS; ++i)
+ fastlz_decompress(compressed->data, in->size_compressed, *re, *re_size);
+
+ struct timespec begin;
+ clock_gettime(CLOCK_MONOTONIC, &begin);
+
+ for (size_t i = 0; i < ACTUAL_CALLS; ++i)
+ fastlz_decompress(compressed->data, in->size_compressed, *re, *re_size);
+
+ struct timespec end;
+ clock_gettime(CLOCK_MONOTONIC, &end);
+
+ free(compressed->data);
+ free(compressed);
+
+ subtract_timespec(&end, &begin);
+ return end;
+}
+
+struct test_algo fastlz = {
+ .comp = fastlz_compress_test,
+ .decomp = fastlz_decompress_test,
+};
--- /dev/null
+#ifndef FASTLZ_TEST_H
+#define FASTLZ_TEST_H
+
+#include "test_compression_common.h"
+
+extern struct test_algo fastlz;
+
+struct fastlz_compressed {
+ struct common_compressed common;
+ char *data;
+ size_t size_original;
+};
+
+#endif
\ No newline at end of file
--- /dev/null
+/* MIT License
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is furnished
+ * to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE. */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "logcommon.h"
+#include "test_compression_common.h"
+#include "fastlz_test.h"
+
+// The strings to compress.
+static const char *const repeated_string = "Did you ever hear the tragedy of darth plagueis the wise?";
+static const char *const fuzzable_string = "Ala ma kota";
+static const char *const garbage_string = "fdffdfdfddfdfdfdd";
+
+void modify_one_letter(char *str, size_t size)
+{
+ str[rand() % size] = 'a' + rand() % 26;
+}
+
+void copy_string_repeatedly(char *in, size_t size, const char *source, bool apply_fuzzing)
+{
+ const size_t source_len = strlen(source);
+ const size_t repeats = size / source_len;
+
+ for (size_t i = 0; i < repeats; ++i) {
+ memcpy(in + i * source_len, source, source_len);
+ if (apply_fuzzing)
+ modify_one_letter(in + i * source_len, source_len);
+ }
+}
+
+void gen_zeroes(char *in, size_t size)
+{
+ for (size_t i = 0; i < size; ++i)
+ in[i] = '\0';
+}
+
+void gen_alpha(char *in, size_t size)
+{
+ for (size_t i = 0; i < size; ++i)
+ in[i] = 'a' + rand() % 26;
+}
+
+void gen_01(char *in, size_t size)
+{
+ /* Produce something like 010100101000111...
+ * Note that the algorithm works at the level
+ * of bytes, not bits, so these being mostly
+ * composed of zero-bits doesn't matter much. */
+ for (size_t i = 0; i < size; ++i)
+ in[i] = rand() % 2;
+}
+
+void gen_repeated_string(char *in, size_t size)
+{
+ /* Generates a pattern of a repeated string,
+ * disrupted near the end by garbage (RRRRRRGR) */
+
+ gen_zeroes(in, size); // for strcat
+
+ const size_t extras = strlen(garbage_string) + strlen(repeated_string) + sizeof '\0';
+ assert(size >= extras);
+
+ copy_string_repeatedly(in, size - extras, repeated_string, false);
+ strcat(in, garbage_string);
+ strcat(in, repeated_string);
+}
+
+void gen_fuzzy_string(char *in, size_t size)
+{
+ /* Generates a slightly corrupted pattern.
+ * For example with the base of "Ala ma kota":
+ *
+ * Ula ma kota
+ * Ala ma bota
+ * Ala ma kopa
+ * Ada ma kota
+ * Ala ma koty
+ * Ala ma kuta
+ * Ale ma kota
+ * Ala da kota */
+
+ const size_t fuzzable_len = strlen(fuzzable_string);
+ gen_zeroes(in + size - fuzzable_len, fuzzable_len); // the pattern won't fit perfectly
+
+ copy_string_repeatedly(in, size, fuzzable_string, true);
+}
+
+void gen_weighted_alpha(char *in, size_t size)
+{
+ /* Produce a random string with letters weighted
+ * by their ~actual English frequencies. */
+ static const struct letter {
+ char letter;
+ size_t freq;
+ } letters[] =
+ {{'E', 13}
+ ,{'T', 10}
+ ,{'A', 8}
+ ,{'O', 8}
+ ,{'I', 7}
+ ,{'N', 7}
+
+ ,{'S', 6}
+ ,{'H', 6}
+ ,{'R', 6}
+ ,{'D', 5}
+ ,{'L', 4}
+ ,{'U', 3}
+
+ ,{'C', 3}
+ ,{'M', 3}
+ ,{'W', 2}
+ ,{'F', 2}
+ ,{'G', 2}
+ ,{'Y', 2}
+ ,{'P', 2}
+ ,{'B', 1}
+ ,{'V', 1}
+ ,{'K', 1}
+ };
+
+ size_t total = 0; // ideally this would be compile-time but we're not C++
+ for (size_t i = 0; i < NELEMS(letters); ++i)
+ total += letters[i].freq;
+
+ for (size_t i = 0; i < size; ++i) {
+ int rnd = rand() % total;
+ int j = 0;
+ while (rnd > letters[j].freq) {
+ rnd -= letters[j].freq;
+ ++j;
+ }
+ in[i] = letters[j].letter;
+ }
+}
+
+typedef void (*generator_t)(char *, size_t);
+void test_via_generator(generator_t gen, size_t size, const struct test_algo *algo, const char *algo_name, const char *gen_name)
+{
+ char *const in = malloc(size);
+ assert(in);
+ gen(in, size);
+
+ struct common_compressed *out;
+ struct timespec ts_comp = algo->comp(in, size, &out);
+ assert(out->size_compressed);
+
+ const size_t comp_size = out->size_compressed;
+
+ char *re;
+ size_t re_size;
+ struct timespec ts_decomp = algo->decomp(out, &re, &re_size);
+
+ for (size_t i = 0; i < re_size; ++i)
+ assert(re[i] == in[i]);
+ free(in);
+ free(re);
+
+ multiply_timespec(&ts_comp , (double) MSEC_PER_SEC / ACTUAL_CALLS);
+ multiply_timespec(&ts_decomp, (double) MSEC_PER_SEC / ACTUAL_CALLS);
+
+ printf("%s / %s / %zu\n"
+ "\t compressed in %d.%02dms\n"
+ "\tdecompressed in %d.%02dms\n"
+ "\tcompressed into %zu bytes (%.1f%%)\n"
+ , algo_name, gen_name, size
+ , (int) ts_comp.tv_sec, (int) ts_comp.tv_nsec / (NSEC_PER_SEC / 100)
+ , (int) ts_decomp.tv_sec, (int) ts_decomp.tv_nsec / (NSEC_PER_SEC / 100)
+ , comp_size, 100.f * comp_size / size
+ );
+}
+
+int main()
+{
+#ifdef ASAN_BUILD
+ /* NB: The following tests fails when ASAN is enabled, with a supposed memory
+ * violation in fastlz. This does not spark joy and we should probably fix this.
+ * Hovever, we have more urgent things to do right now, so let's disable
+ * the test on ASAN for now. */
+
+ return EXIT_SKIP;
+#endif /* ASAN_BUILD */
+
+ static const int sizes[] = {
+ 4096, // max single log payload
+ 65530, // just below 2^16, which may be a behaviour change threshold for some algos
+ 65540, // just above 2^16
+ 131072, // the size of the buffer used by dlog_logger
+ };
+
+ static const struct {
+ const struct test_algo *algo;
+ const char *name;
+ } algos[] = {
+ #define A(x) { .algo = &x, .name = #x }
+ A(fastlz),
+ #undef A
+ };
+
+ static const struct {
+ const generator_t gen;
+ const char *name;
+ } gens[] = {
+ #define G(x) { .gen = gen_ ## x, .name = #x }
+ G(zeroes),
+ G(01),
+ G(alpha),
+ G(repeated_string),
+ G(fuzzy_string),
+ G(weighted_alpha),
+ #undef G
+ };
+
+ for (size_t s = 0; s < NELEMS(sizes); ++s)
+ for (size_t g = 0; g < NELEMS(gens); ++g)
+ for (size_t a = 0; a < NELEMS(algos); ++a)
+ test_via_generator(gens[g].gen, sizes[s], algos[a].algo, algos[a].name, gens[g].name);
+
+ return EXIT_SUCCESS;
+}
--- /dev/null
+#ifndef TEST_COMPRESSION_COMMON_H
+#define TEST_COMPRESSION_COMMON_H
+
+#include <stddef.h>
+
+#define WARMUP_CALLS 10
+#define ACTUAL_CALLS 100
+
+struct common_compressed {
+ size_t size_compressed;
+};
+
+struct test_algo {
+ struct timespec (*comp)(char *in, size_t in_size, struct common_compressed **out);
+ struct timespec (*decomp)(struct common_compressed *in, char **out, size_t *out_size);
+};
+
+#endif
+++ /dev/null
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include <unistd.h>
-
-#include "logcommon.h" // for NELEMS
-#include "fastlz.h"
-
-// The string to compress.
-static const char *duplicate_string = "Did you ever hear the tragedy of darth plagueis the wise?";
-static const char *fuzzable_string = "Ala ma kota";
-static const char *random_string = "fdffdfdfddfdfdfdd";
-
-size_t wrapper_compress(char *in, size_t size_in, char *out)
-{
- clock_t begin = clock();
- size_t ret = fastlz_compress(in, size_in, out);
- clock_t end = clock();
- double timer = (double)(end - begin) / CLOCKS_PER_SEC;
- printf("Comp time spent: %lf, size:%zu, size_compressed:%zu\n", timer, size_in, ret);
- return ret;
-}
-
-/* in - compressed data | out - output */
-size_t wrapper_decompress(char *in, size_t size_in, char *out, size_t size_out)
-{
- clock_t begin = clock();
- size_t ret = fastlz_decompress(in, size_in, out, size_out);
- clock_t end = clock();
- double timer = (double)(end - begin) / CLOCKS_PER_SEC;
- printf("Decomp time spent: %lf, size:%zu, size_decompressed:%zu\n", timer, size_in, ret);
- return ret;
-}
-
-void modify_one_letter(char *str, size_t begin, size_t end)
-{
- str[begin + rand() % (end - begin)] = 'a' + rand() % 26;
-}
-
-char *copy_string(size_t calloc_size, size_t copy_size, const char *source, bool apply_fuzzing)
-{
- const size_t source_len = strlen(source);
-
- assert(source);
- assert(copy_size > 0);
- assert(calloc_size > 0);
- assert(calloc_size >= copy_size);
-
- int repeats_num = copy_size / source_len;
- assert(repeats_num > 0);
-
- char *result = calloc(1, calloc_size);
- assert(result);
-
- for (int i = 0; i < repeats_num; ++i) {
- memcpy(result + i * source_len, source, source_len);
- if (apply_fuzzing)
- modify_one_letter(result, i * source_len, (i + 1) * source_len);
- }
-
- return result;
-}
-
-void test_duplicate_string(size_t size)
-{
- /* place for "ffdfdfd", one closing sentence and '\0'*/
- const size_t extras = sizeof '\0' + strlen(random_string) + strlen(duplicate_string);
- assert(size >= extras);
- size_t temp_size = size;
- temp_size -= extras;
- char *in = copy_string(size, temp_size, duplicate_string, false);
-
- assert(in);
-
- strcat(in, random_string);
- strcat(in, duplicate_string);
-
- size_t src_len = strlen(in);
- char *out = malloc(FASTLZ_NEEDED_OUTPUT_SIZE(src_len));
- char *re = malloc(src_len);
-
- assert(out);
- assert(re);
-
- size_t compressed_size = wrapper_compress(in, src_len, out);
- assert(compressed_size);
-
- size_t restored_size = wrapper_decompress(out, compressed_size, re, src_len);
- assert(restored_size == src_len);
- for (size_t i = 0; i < src_len; ++i)
- assert(re[i] == in[i]);
-
- free(out);
- free(re);
- free(in);
-}
-
-void test_zeroes(size_t size)
-{
- char *in = calloc(size, 1);
- char *out = malloc(FASTLZ_NEEDED_OUTPUT_SIZE(size));
- char *re = malloc(size);
-
- assert(in);
- assert(out);
- assert(re);
-
- size_t compressed_size = wrapper_compress(in, size, out);
- assert(compressed_size);
- assert(compressed_size < 1024); // literally just zeroes, shouldn't take more than 1kB ever
-
- size_t restored_size = wrapper_decompress(out, compressed_size, re, size);
- assert(restored_size == size);
- for (size_t i = 0; i < size; ++i)
- assert(re[i] == in[i]);
-
- free(in);
- free(out);
- free(re);
-}
-
-void test_01(size_t size)
-{
- char *in = malloc(size);
- char *out = malloc(FASTLZ_NEEDED_OUTPUT_SIZE(size));
- char *re = malloc(size);
-
- assert(in);
- assert(out);
- assert(re);
-
- /* Produce something like 010100101000111...
- * Note that the algorithm works at the level
- * of bytes, not bits, so these being mostly
- * composed of zero-bits doesn't matter much. */
- for (size_t i = 0; i < size; ++i)
- in[i] = rand() % 2;
-
- size_t compressed_size = wrapper_compress(in, size, out);
- assert(compressed_size);
- assert(compressed_size < size * 0.52); // about half, plus a bit of overhead
-
- size_t restored_size = wrapper_decompress(out, compressed_size, re, size);
- assert(restored_size == size);
- for (size_t i = 0; i < size; ++i)
- assert(re[i] == in[i]);
-
- free(in);
- free(out);
- free(re);
-}
-
-void test_alpha_weighted(size_t size)
-{
- char *in = malloc(size);
- char *out = malloc(FASTLZ_NEEDED_OUTPUT_SIZE(size));
- char *re = malloc(size);
-
- assert(in);
- assert(out);
- assert(re);
-
- /* Produce a random string with letters weighted
- * by their ~actual English frequencies. */
- static const struct letter {
- char letter;
- size_t freq;
- } letters[] =
- {{'E', 13}
- ,{'T', 10}
- ,{'A', 8}
- ,{'O', 8}
- ,{'I', 7}
- ,{'N', 7}
-
- ,{'S', 6}
- ,{'H', 6}
- ,{'R', 6}
- ,{'D', 5}
- ,{'L', 4}
- ,{'U', 3}
-
- ,{'C', 3}
- ,{'M', 3}
- ,{'W', 2}
- ,{'F', 2}
- ,{'G', 2}
- ,{'Y', 2}
- ,{'P', 2}
- ,{'B', 1}
- ,{'V', 1}
- ,{'K', 1}
- };
-
- size_t total = 0;
- for (size_t i = 0; i < NELEMS(letters); ++i)
- total += letters[i].freq;
-
- for (size_t i = 0; i < size; ++i) {
- int rnd = rand() % total;
- int j = 0;
- while (rnd > letters[j].freq) {
- rnd -= letters[j].freq;
- ++j;
- }
- in[i] = letters[j].letter;
- }
-
- size_t compressed_size = wrapper_compress(in, size, out);
- assert(compressed_size);
-
- /* Suprisingly, fastlz can't really use the fact that the data is shaped like this.
- * Be happy if we get, say, 85% plus some overhead. */
- assert(compressed_size < (size_t) (size * 0.85) + 512);
-
- size_t restored_size = wrapper_decompress(out, compressed_size, re, size);
- assert(restored_size == size);
- for (size_t i = 0; i < size; ++i)
- assert(re[i] == in[i]);
-
- free(in);
- free(out);
- free(re);
-}
-
-void test_alpha(size_t size)
-{
- char *in = malloc(size);
- char *out = malloc(FASTLZ_NEEDED_OUTPUT_SIZE(size));
- char *re = malloc(size);
-
- assert(in);
- assert(out);
- assert(re);
-
- for (size_t i = 0; i < size; ++i)
- in[i] = 'a' + (rand() % 20);
-
- size_t compressed_size = wrapper_compress(in, size, out);
- assert(compressed_size);
-
- /* Very random data, not too compressible.
- * Be happy if we get, say, 85% plus some overhead. */
- assert(compressed_size < (size_t)(size * 0.85) + 512);
-
- size_t restored_size = wrapper_decompress(out, compressed_size, re, size);
- assert(restored_size == size);
- for (size_t i = 0; i < size; ++i)
- assert(re[i] == in[i]);
-
- free(in);
- free(out);
- free(re);
-}
-
-void test_fuzzy_string(size_t size)
-{
- char *in = copy_string(size, size, fuzzable_string, true);
- assert(in);
- size_t in_len = strlen(in);
-
- char *out = malloc(FASTLZ_NEEDED_OUTPUT_SIZE(in_len));
- char *re = malloc(in_len);
-
- assert(out);
- assert(re);
-
- size_t compressed_size = wrapper_compress(in, in_len, out);
- assert(compressed_size);
-
- size_t restored_size = wrapper_decompress(out, compressed_size, re, in_len);
- assert(restored_size == in_len);
- for (size_t i = 0; i < in_len; ++i)
- assert(re[i] == in[i]);
-
- free(out);
- free(in);
- free(re);
-}
-
-int main()
-{
-#ifdef ASAN_BUILD
- /* NB: The following tests fails when ASAN is enabled, with an supposed memory
- * violation in fastlz. This does not spark joy and we should probably fix this.
- * Hovever, we have more urgent things to do right now, so let's disable
- * the test on ASAN for now. */
-
- return EXIT_SKIP;
-#endif /* ASAN_BUILD */
-
- static const int sizes[] = {
- 4096, // a single log
- 65530, // just below the magic threshold which changes underlying algo
- 65540, // just above it
- 131072,
- };
-
- for (size_t i = 0; i < NELEMS(sizes); ++i) {
- const int size = sizes[i];
- test_zeroes(size);
- test_01(size);
- test_alpha_weighted(size);
- test_alpha(size);
- test_duplicate_string(size);
- test_fuzzy_string(size);
- }
-
- return EXIT_SUCCESS;
-}