8 #include "logcommon.h" // for NELEMS
11 // The string to compress.
12 static const char *duplicate_string = "Did you ever hear the tragedy of darth plagueis the wise?";
13 static const char *fuzzable_string = "Ala ma kota";
14 static const char *random_string = "fdffdfdfddfdfdfdd";
16 size_t wrapper_compress(char *in, size_t size_in, char *out)
18 clock_t begin = clock();
19 size_t ret = fastlz_compress(in, size_in, out);
20 clock_t end = clock();
21 double timer = (double)(end - begin) / CLOCKS_PER_SEC;
22 printf("Comp time spent: %lf, size:%zu, size_compressed:%zu\n", timer, size_in, ret);
26 /* in - compressed data | out - output */
27 size_t wrapper_decompress(char *in, size_t size_in, char *out, size_t size_out)
29 clock_t begin = clock();
30 size_t ret = fastlz_decompress(in, size_in, out, size_out);
31 clock_t end = clock();
32 double timer = (double)(end - begin) / CLOCKS_PER_SEC;
33 printf("Decomp time spent: %lf, size:%zu, size_decompressed:%zu\n", timer, size_in, ret);
37 void modify_one_letter(char *str, size_t begin, size_t end)
39 str[begin + rand() % (end - begin)] = 'a' + rand() % 26;
42 char *copy_string(size_t calloc_size, size_t copy_size, const char *source, bool apply_fuzzing)
44 const size_t source_len = strlen(source);
47 assert(copy_size > 0);
48 assert(calloc_size > 0);
49 assert(calloc_size >= copy_size);
51 int repeats_num = copy_size / source_len;
52 assert(repeats_num > 0);
54 char *result = calloc(1, calloc_size);
57 for (int i = 0; i < repeats_num; ++i) {
58 memcpy(result + i * source_len, source, source_len);
60 modify_one_letter(result, i * source_len, (i + 1) * source_len);
66 void test_duplicate_string(size_t size)
68 /* place for "ffdfdfd", one closing sentence and '\0'*/
69 const size_t extras = sizeof '\0' + strlen(random_string) + strlen(duplicate_string);
70 assert(size >= extras);
71 size_t temp_size = size;
73 char *in = copy_string(size, temp_size, duplicate_string, false);
77 strcat(in, random_string);
78 strcat(in, duplicate_string);
80 size_t src_len = strlen(in);
81 char *out = malloc(FASTLZ_NEEDED_OUTPUT_SIZE(src_len));
82 char *re = malloc(src_len);
87 size_t compressed_size = wrapper_compress(in, src_len, out);
88 assert(compressed_size);
90 size_t restored_size = wrapper_decompress(out, compressed_size, re, src_len);
91 assert(restored_size == src_len);
92 for (size_t i = 0; i < src_len; ++i)
93 assert(re[i] == in[i]);
100 void test_zeroes(size_t size)
102 char *in = calloc(size, 1);
103 char *out = malloc(FASTLZ_NEEDED_OUTPUT_SIZE(size));
104 char *re = malloc(size);
110 size_t compressed_size = wrapper_compress(in, size, out);
111 assert(compressed_size);
112 assert(compressed_size < 1024); // literally just zeroes, shouldn't take more than 1kB ever
114 size_t restored_size = wrapper_decompress(out, compressed_size, re, size);
115 assert(restored_size == size);
116 for (size_t i = 0; i < size; ++i)
117 assert(re[i] == in[i]);
124 void test_01(size_t size)
126 char *in = malloc(size);
127 char *out = malloc(FASTLZ_NEEDED_OUTPUT_SIZE(size));
128 char *re = malloc(size);
134 /* Produce something like 010100101000111...
135 * Note that the algorithm works at the level
136 * of bytes, not bits, so these being mostly
137 * composed of zero-bits doesn't matter much. */
138 for (size_t i = 0; i < size; ++i)
141 size_t compressed_size = wrapper_compress(in, size, out);
142 assert(compressed_size);
143 assert(compressed_size < size * 0.52); // about half, plus a bit of overhead
145 size_t restored_size = wrapper_decompress(out, compressed_size, re, size);
146 assert(restored_size == size);
147 for (size_t i = 0; i < size; ++i)
148 assert(re[i] == in[i]);
155 void test_alpha_weighted(size_t size)
157 char *in = malloc(size);
158 char *out = malloc(FASTLZ_NEEDED_OUTPUT_SIZE(size));
159 char *re = malloc(size);
165 /* Produce a random string with letters weighted
166 * by their ~actual English frequencies. */
167 static const struct letter {
198 for (size_t i = 0; i < NELEMS(letters); ++i)
199 total += letters[i].freq;
201 for (size_t i = 0; i < size; ++i) {
202 int rnd = rand() % total;
204 while (rnd > letters[j].freq) {
205 rnd -= letters[j].freq;
208 in[i] = letters[j].letter;
211 size_t compressed_size = wrapper_compress(in, size, out);
212 assert(compressed_size);
214 /* Suprisingly, fastlz can't really use the fact that the data is shaped like this.
215 * Be happy if we get, say, 85% plus some overhead. */
216 assert(compressed_size < (size_t) (size * 0.85) + 512);
218 size_t restored_size = wrapper_decompress(out, compressed_size, re, size);
219 assert(restored_size == size);
220 for (size_t i = 0; i < size; ++i)
221 assert(re[i] == in[i]);
228 void test_alpha(size_t size)
230 char *in = malloc(size);
231 char *out = malloc(FASTLZ_NEEDED_OUTPUT_SIZE(size));
232 char *re = malloc(size);
238 for (size_t i = 0; i < size; ++i)
239 in[i] = 'a' + (rand() % 20);
241 size_t compressed_size = wrapper_compress(in, size, out);
242 assert(compressed_size);
244 /* Very random data, not too compressible.
245 * Be happy if we get, say, 85% plus some overhead. */
246 assert(compressed_size < (size_t)(size * 0.85) + 512);
248 size_t restored_size = wrapper_decompress(out, compressed_size, re, size);
249 assert(restored_size == size);
250 for (size_t i = 0; i < size; ++i)
251 assert(re[i] == in[i]);
258 void test_fuzzy_string(size_t size)
260 char *in = copy_string(size, size, fuzzable_string, true);
262 size_t in_len = strlen(in);
264 char *out = malloc(FASTLZ_NEEDED_OUTPUT_SIZE(in_len));
265 char *re = malloc(in_len);
270 size_t compressed_size = wrapper_compress(in, in_len, out);
271 assert(compressed_size);
273 size_t restored_size = wrapper_decompress(out, compressed_size, re, in_len);
274 assert(restored_size == in_len);
275 for (size_t i = 0; i < in_len; ++i)
276 assert(re[i] == in[i]);
286 /* NB: The following tests fails when ASAN is enabled, with an supposed memory
287 * violation in fastlz. This does not spark joy and we should probably fix this.
288 * Hovever, we have more urgent things to do right now, so let's disable
289 * the test on ASAN for now. */
292 #endif /* ASAN_BUILD */
294 static const int sizes[] = {
295 4096, // a single log
296 65530, // just below the magic threshold which changes underlying algo
297 65540, // just above it
301 for (size_t i = 0; i < NELEMS(sizes); ++i) {
302 const int size = sizes[i];
305 test_alpha_weighted(size);
307 test_duplicate_string(size);
308 test_fuzzy_string(size);