Add some basic tests for FastLZ. 06/274106/5
authorMichal Bloch <m.bloch@samsung.com>
Tue, 19 Apr 2022 08:03:06 +0000 (10:03 +0200)
committerMateusz Majewski <m.majewski2@samsung.com>
Thu, 28 Apr 2022 10:39:43 +0000 (12:39 +0200)
Change-Id: I245de78a9b5b5d3fec80979351cc493700b4dbd3

Makefile.am
src/tests/test_fastlz_neg.c [new file with mode: 0644]
src/tests/test_fastlz_pos.c [new file with mode: 0644]

index 830517c..ab92e94 100644 (file)
@@ -430,6 +430,8 @@ test_libredirect_multi_SOURCES = \
 
 
 check_PROGRAMS = \
+       src/tests/test_fastlz_pos \
+       src/tests/test_fastlz_neg \
        src/tests/fuzz_logprint \
        src/tests/test_libdlog_container_warning \
        src/tests/test_logger_log_storage \
@@ -489,6 +491,14 @@ check_CFLAGS = $(AM_CFLAGS) -O0 -fprofile-arcs -DUNIT_TEST \
 TESTS = $(check_PROGRAMS) \
        src/tests/lcov.sh # doesn't test anything, but generates a test summary
 
+src_tests_test_fastlz_pos_SOURCES = external/fastlz/fastlz.c src/tests/test_fastlz_pos.c
+src_tests_test_fastlz_pos_CFLAGS = $(check_CFLAGS)
+src_tests_test_fastlz_pos_LDFLAGS = $(AM_LDFLAGS)
+
+src_tests_test_fastlz_neg_SOURCES = external/fastlz/fastlz.c src/tests/test_fastlz_neg.c
+src_tests_test_fastlz_neg_CFLAGS = $(check_CFLAGS)
+src_tests_test_fastlz_neg_LDFLAGS = $(AM_LDFLAGS)
+
 src_tests_fuzz_logprint_SOURCES = src/tests/fuzz_logprint.c src/shared/ptrs_list.c src/shared/logprint.c src/shared/logcommon.c src/shared/queued_entry.c src/shared/parsers.c src/shared/translate_syslog.c src/shared/queued_entry_timestamp.c
 src_tests_fuzz_logprint_CFLAGS = $(check_CFLAGS)
 src_tests_fuzz_logprint_LDFLAGS = $(AM_LDFLAGS)
diff --git a/src/tests/test_fastlz_neg.c b/src/tests/test_fastlz_neg.c
new file mode 100644 (file)
index 0000000..62abf92
--- /dev/null
@@ -0,0 +1,99 @@
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "logcommon.h" // for NELEMS
+#include "fastlz.h"
+
+void test_no_overflow()
+{
+#define ACTUAL_BUFFER_SIZE 1337
+#define SENTINEL_SIZE 69
+
+       /* Checks whether only the part of the restored
+        * buffer that was supplied is actually used. */
+
+       char in[ACTUAL_BUFFER_SIZE] = {0,};
+       char out[FASTLZ_NEEDED_OUTPUT_SIZE(ACTUAL_BUFFER_SIZE)];
+       char re[ACTUAL_BUFFER_SIZE + SENTINEL_SIZE];
+
+       size_t compressed_size = (size_t) fastlz_compress(in, sizeof in, out);
+
+       for (size_t i = 0; i < SENTINEL_SIZE; ++i)
+               re[ACTUAL_BUFFER_SIZE + i] = i;
+
+       int restored_size = fastlz_decompress(out, compressed_size, re, ACTUAL_BUFFER_SIZE);
+       assert(restored_size == sizeof in);
+
+       for (size_t i = 0; i < SENTINEL_SIZE; ++i)
+               assert(re[restored_size + i] == i);
+
+#undef ACTUAL_BUFFER_SIZE
+#undef SENTINEL_SIZE
+}
+
+void test_garbage_decompression()
+{
+       /* Checks whether trying to decompressing data that
+        * does not come from the compressor is rejected. */
+
+       char garbage[] = "Uwaga! W danych znajduja sie (rzadkie; pozorne lub nie) anomalie i nie jest to blad w zadaniu.";
+       char re[sizeof garbage + 64];
+
+       int restored_size = fastlz_decompress(garbage, sizeof garbage, re, sizeof re);
+       assert(restored_size == 0);
+}
+
+void test_insufficient_decompress_buffer()
+{
+       /* Checks whether an insufficient buffer
+        * for restoration is properly rejected. */
+
+       char in[] = "Kolorowy Morszczuk, Hex Q8, Mielizna, 4%";
+       char out[66]; // a minimum that takes precedence over the +5% rule
+       char re[sizeof in - 5];
+
+       size_t compressed_size = (size_t) fastlz_compress(in, sizeof in, out);
+       assert(compressed_size > 0);
+
+       int restored_size = fastlz_decompress(out, compressed_size, re, sizeof re);
+       assert(restored_size == 0);
+}
+
+void test_keep_input_intact()
+{
+       /* Checks whether the input is kept intact.
+        * The input should not be overridden. */
+
+       char in[] = "Niebieski Dorsz, Karaka, Baron Manfred of Twinford, Nowo Zwodowany, Hex G13 (Westmarch), Transport: Mithril, 77%";
+       char out[FASTLZ_NEEDED_OUTPUT_SIZE(sizeof in)];
+
+       char in_original[sizeof in];
+       memcpy(in_original, in, sizeof in_original);
+
+       size_t compressed_size = (size_t) fastlz_compress(in, sizeof in, out);
+       assert(compressed_size > 0);
+
+       for (size_t i = 0; i < sizeof in; ++i)
+               assert(in[i] == in_original[i]);
+}
+
+int main()
+{
+       test_no_overflow();
+       test_garbage_decompression();
+       test_insufficient_decompress_buffer();
+       test_keep_input_intact();
+
+       /* The header specifies that the minimum input size is 16,
+        * but testing shows that smaller inputs exist that compress
+        * just fine, so there is no test for that.
+        *
+        * There is also the minimum +5% / 66 output buffer size,
+        * but the compressor does not actually check it (and will
+        * happily go past the end of the buffer to bulldoze what
+        * lies behind it), so we can't really test that either. */
+
+       return EXIT_SUCCESS;
+}
diff --git a/src/tests/test_fastlz_pos.c b/src/tests/test_fastlz_pos.c
new file mode 100644 (file)
index 0000000..3e2bc12
--- /dev/null
@@ -0,0 +1,194 @@
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "logcommon.h" // for NELEMS
+#include "fastlz.h"
+
+void test_zeroes(size_t size)
+{
+       char *in = calloc(size, 1);
+       char *out = malloc(FASTLZ_NEEDED_OUTPUT_SIZE(size));
+       char *re = malloc(size);
+
+       assert(in);
+       assert(out);
+       assert(re);
+
+       size_t compressed_size = (size_t) fastlz_compress(in, size, out);
+       assert(compressed_size);
+       assert(compressed_size < 1024); // literally just zeroes, shouldn't take more than 1kB ever
+
+       int restored_size = fastlz_decompress(out, compressed_size, re, size);
+       assert(restored_size == size);
+       for (size_t i = 0; i < size; ++i)
+               assert(re[i] == in[i]);
+
+       free(in);
+       free(out);
+       free(re);
+}
+
+void test_01(size_t size)
+{
+       char *in = malloc(size);
+       char *out = malloc(FASTLZ_NEEDED_OUTPUT_SIZE(size));
+       char *re = malloc(size);
+
+       assert(in);
+       assert(out);
+       assert(re);
+
+       /* Produce something like 010100101000111...
+        * Note that the algorithm works at the level
+        * of bytes, not bits, so these being mostly
+        * composed of zero-bits doesn't matter much. */
+       for (size_t i = 0; i < size; ++i)
+               in[i] = rand() % 2;
+
+       size_t compressed_size = (size_t) fastlz_compress(in, size, out);
+       assert(compressed_size);
+       assert(compressed_size < size * 0.52); // about half, plus a bit of overhead
+
+       int restored_size = fastlz_decompress(out, compressed_size, re, size);
+       assert(restored_size == size);
+       for (size_t i = 0; i < size; ++i)
+               assert(re[i] == in[i]);
+
+       free(in);
+       free(out);
+       free(re);
+}
+
+void test_alpha_weighted(size_t size)
+{
+       char *in = malloc(size);
+       char *out = malloc(FASTLZ_NEEDED_OUTPUT_SIZE(size));
+       char *re = malloc(size);
+
+       assert(in);
+       assert(out);
+       assert(re);
+
+       /* Produce a random string with letters weighted
+        * by their ~actual English frequencies. */
+       static const struct letter {
+               char letter;
+               size_t freq;
+       } letters[] =
+               {{'E', 13}
+               ,{'T', 10}
+               ,{'A',  8}
+               ,{'O',  8}
+               ,{'I',  7}
+               ,{'N',  7}
+
+               ,{'S',  6}
+               ,{'H',  6}
+               ,{'R',  6}
+               ,{'D',  5}
+               ,{'L',  4}
+               ,{'U',  3}
+
+               ,{'C',  3}
+               ,{'M',  3}
+               ,{'W',  2}
+               ,{'F',  2}
+               ,{'G',  2}
+               ,{'Y',  2}
+               ,{'P',  2}
+               ,{'B',  1}
+               ,{'V',  1}
+               ,{'K',  1}
+       };
+
+       size_t total = 0;
+       for (size_t i = 0; i < NELEMS(letters); ++i)
+               total += letters[i].freq;
+
+       for (size_t i = 0; i < size; ++i) {
+               int rnd = rand() % total;
+               int j = 0;
+               while (rnd > letters[j].freq) {
+                       rnd -= letters[j].freq;
+                       ++j;
+               }
+               in[i] = letters[j].letter;
+       }
+
+       size_t compressed_size = (size_t) fastlz_compress(in, size, out);
+       assert(compressed_size);
+
+       /* Suprisingly, fastlz can't really use the fact that the data is shaped like this.
+        * Be happy if we get, say, 85% plus some overhead. */
+       assert(compressed_size < (size_t) (size * 0.85) + 512);
+
+       int restored_size = fastlz_decompress(out, compressed_size, re, size);
+       assert(restored_size == size);
+       for (size_t i = 0; i < size; ++i)
+               assert(re[i] == in[i]);
+
+       free(in);
+       free(out);
+       free(re);
+}
+
+void test_alpha(size_t size)
+{
+       char *in = malloc(size);
+       char *out = malloc(FASTLZ_NEEDED_OUTPUT_SIZE(size));
+       char *re = malloc(size);
+
+       assert(in);
+       assert(out);
+       assert(re);
+
+       for (size_t i = 0; i < size; ++i)
+               in[i] = 'a' + (rand() % 20);
+
+       size_t compressed_size = (size_t) fastlz_compress(in, size, out);
+       assert(compressed_size);
+
+       /* Very random data, not too compressible.
+        * Be happy if we get, say, 85% plus some overhead. */
+       assert(compressed_size < (size_t) (size * 0.85) + 512);
+
+       int restored_size = fastlz_decompress(out, compressed_size, re, size);
+       assert(restored_size == size);
+       for (size_t i = 0; i < size; ++i)
+               assert(re[i] == in[i]);
+
+       free(in);
+       free(out);
+       free(re);
+}
+
+int main()
+{
+#ifdef ASAN_BUILD
+       /* NB: The following tests fails when ASAN is enabled, with an supposed memory
+       * violation in fastlz. This does not spark joy and we should probably fix this.
+       * Hovever, we have more urgent things to do right now, so let's disable
+       * the test on ASAN for now. */
+
+       return EXIT_SKIP;
+#endif /* ASAN_BUILD */
+
+       static const int sizes[] = {
+                 4096, // a single log
+                65530, // just below the magic threshold which changes underlying algo
+                65540, // just above it
+               131072,
+       };
+
+       for (size_t i = 0; i < NELEMS(sizes); ++i) {
+               const int size = sizes[i];
+               test_zeroes(size);
+               test_01(size);
+               test_alpha_weighted(size);
+               test_alpha(size);
+       }
+
+       return EXIT_SUCCESS;
+}