8 #define BLOCK_SIZE 65536
11 The format is tailored for fast decompression (i.e. only byte based),
12 and skewed to ASCII content (highest bit often not set):
15 - self-describing ASCII character hex L
16 b 100lllll <l+1 bytes>
17 - literal run of length l+1
19 - back ref of length l+2, at offset -(o+1) (o < 1 << 10)
21 - back ref of length l+2+8, at offset -(o+1) (o < 1 << 8)
23 - back ref of length l+2, at offset -(o+1) (o < 1 << 16)
24 f 1111llll <8o> <8o> <8l>
25 - back ref, length l+18 (l < 1<<12), offset -(o+1) (o < 1<<16)
27 Generally for a literal of length L we need L+1 bytes, hence it is
28 better to encode also very short backrefs (2 chars) as backrefs if
29 their offset is small, as that only needs two bytes. Except if we
30 already have a literal run, in that case it's better to append there,
31 instead of breaking it for a backref. So given a potential backref
32 at offset O, length L the strategy is as follows:
34 L < 2 : encode as 1-literal
35 L == 2, O > 1024 : encode as 1-literal
36 L == 2, have already literals: encode as 1-literal
38 L >= 2, L <= 9, O < 1024 : encode as c
39 L >= 10, L <= 41, O < 256 : encode as d
40 else we have either O >= 1024, or L >= 42:
41 L >= 2, L <= 17, O < 65536 : encode as e
42 L >= 18, L <= 4095+18, O < 65536 : encode as f
43 else we have either L >= 4096+18 or O >= 65536.
44 O >= 65536: encode as 1-literal, too bad
45 (with the current block size this can't happen)
46 L >= 4096+18, so reduce to 4095+18 : encode as f
51 compress_buf (const unsigned char *in, unsigned int in_len,
52 unsigned char *out, unsigned int out_len)
54 unsigned int oo = 0; //out-offset
55 unsigned int io = 0; //in-offset
57 unsigned short htab[HS];
58 unsigned short hnext[BLOCK_SIZE];
59 memset (htab, 0, sizeof (htab));
60 memset (hnext, 0, sizeof (hnext));
61 unsigned int litofs = 0;
62 while (io + 2 < in_len)
64 /* Search for a match of the string starting at IN, we have at
65 least three characters. */
66 unsigned int hval = in[io] | in[io + 1] << 8 | in[io + 2] << 16;
67 unsigned int try, mlen, mofs, tries;
68 hval = (hval ^ (hval << 5) ^ (hval >> 5)) - hval * 5;
69 hval = hval & (HS - 1);
71 hnext[io] = htab[hval];
75 for (tries = 0; tries < 4; tries++)
77 unsigned int this_len, this_ofs;
83 io + this_len < in_len
84 && in[try + this_len] == in[io + this_len]; this_len++)
86 this_ofs = (io - try) - 1;
89 else if (this_len == 2 && (litofs || ((io - try) - 1) >= 1024))
91 else if (this_len >= 4096 + 18)
93 else if (this_ofs >= 65536)
95 if (this_len > mlen || (this_len == mlen && this_ofs < mofs))
96 mlen = this_len, mofs = this_ofs;
111 unsigned litlen = io - litofs;
112 //fprintf (stderr, "lit: %d\n", litlen);
115 if (litlen == 1 && in[litofs] < 0x80)
117 if (oo + 1 >= out_len)
119 out[oo++] = in[litofs++];
122 else if (litlen == 2 && in[litofs] < 0x80
123 && in[litofs + 1] < 0x80)
125 if (oo + 2 >= out_len)
127 out[oo++] = in[litofs++];
128 out[oo++] = in[litofs++];
131 else if (litlen <= 32)
133 if (oo + 1 + litlen >= out_len)
135 out[oo++] = 0x80 | (litlen - 1);
137 out[oo++] = in[litofs++];
142 /* Literal length > 32, so chunk it. */
143 if (oo + 1 + 32 >= out_len)
145 out[oo++] = 0x80 | 31;
146 memcpy (out + oo, in + litofs, 32);
155 //fprintf (stderr, "ref: %d @ %d\n", mlen, mofs);
157 if (mlen >= 2 && mlen <= 9 && mofs < 1024)
159 if (oo + 2 >= out_len)
161 out[oo++] = 0xa0 | ((mofs & 0x300) >> 5) | (mlen - 2);
162 out[oo++] = mofs & 0xff;
164 else if (mlen >= 10 && mlen <= 41 && mofs < 256)
166 if (oo + 2 >= out_len)
168 out[oo++] = 0xc0 | (mlen - 10);
171 else if (mlen >= 2 && mlen <= 17)
173 assert (mofs < 65536);
174 if (oo + 3 >= out_len)
176 out[oo++] = 0xe0 | (mlen - 2);
177 out[oo++] = mofs >> 8;
178 out[oo++] = mofs & 0xff;
182 assert (mlen >= 18 && mlen <= 4095 + 18 && mofs < 65536);
183 if (oo + 4 >= out_len)
185 out[oo++] = 0xf0 | ((mlen - 18) >> 8);
186 out[oo++] = mofs >> 8;
187 out[oo++] = mofs & 0xff;
188 out[oo++] = (mlen - 18) & 0xff;
190 /* Insert the hashes for the compressed run [io..io+mlen-1].
191 For [io] we have it already done at the start of the loop.
192 So it's from [io+1..io+mlen-1], and we need three chars per
193 hash, so the accessed characters will be [io+1..io+mlen-1+2],
194 ergo io+mlen+1 < in_len. */
202 in[io] | in[io + 1] << 8 | in[io + 2] << 16;
203 hval = (hval ^ (hval << 5) ^ (hval >> 5)) - hval * 5;
204 hval = hval & (HS - 1);
205 hnext[io] = htab[hval];
212 /* We might have some characters left. */
213 if (io < in_len && !litofs)
219 unsigned litlen = io - litofs;
220 //fprintf (stderr, "lit: %d\n", litlen);
223 if (litlen == 1 && in[litofs] < 0x80)
225 if (oo + 1 >= out_len)
227 out[oo++] = in[litofs++];
230 else if (litlen == 2 && in[litofs] < 0x80 && in[litofs + 1] < 0x80)
232 if (oo + 2 >= out_len)
234 out[oo++] = in[litofs++];
235 out[oo++] = in[litofs++];
238 else if (litlen <= 32)
240 if (oo + 1 + litlen >= out_len)
242 out[oo++] = 0x80 | (litlen - 1);
244 out[oo++] = in[litofs++];
249 /* Literal length > 32, so chunk it. */
250 if (oo + 1 + 32 >= out_len)
252 out[oo++] = 0x80 | 31;
253 memcpy (out + oo, in + litofs, 32);
265 unchecked_decompress_buf (const unsigned char *in, unsigned int in_len,
267 unsigned int out_len __attribute__((unused)))
269 unsigned char *orig_out = out;
270 const unsigned char *in_end = in + in_len;
273 unsigned int first = *in++;
282 //fprintf (stderr, "lit: 1\n");
286 //b 100lllll <l+1 bytes>
288 unsigned int l = (first & 31) + 1;
289 //fprintf (stderr, "lit: %d\n", l);
297 o = first & (3 << 3);
298 o = (o << 5) | *in++;
299 first = (first & 7) + 2;
306 first = (first & 31) + 10;
310 //e 1110llll <8o> <8o>
311 //f 1111llll <8o> <8o> <8l>
317 first = (((first - 16) << 8) | *in++) + 18;
323 //fprintf (stderr, "ref: %d @ %d\n", first, o);
325 /* We know that first will not be zero, and this loop structure is
326 better optimizable. */
334 return out - orig_out;
340 transfer_file (FILE * from, FILE * to, int compress)
342 unsigned char inb[BLOCK_SIZE];
343 unsigned char outb[BLOCK_SIZE];
344 while (!feof (from) && !ferror (from))
346 unsigned int in_len, out_len;
349 in_len = fread (inb, 1, BLOCK_SIZE, from);
352 unsigned char *b = outb;
353 out_len = compress_buf (inb, in_len, outb, sizeof (outb));
355 b = inb, out_len = in_len;
356 if (fwrite (&out_len, sizeof (out_len), 1, to) != 1)
358 perror ("write size");
361 if (fwrite (b, out_len, 1, to) != 1)
363 perror ("write data");
370 if (fread (&in_len, sizeof (in_len), 1, from) != 1)
374 perror ("can't read size");
377 if (fread (inb, in_len, 1, from) != 1)
379 perror ("can't read data");
383 unchecked_decompress_buf (inb, in_len, outb, sizeof (outb));
384 if (fwrite (outb, out_len, 1, to) != 1)
386 perror ("can't write output");
393 /* Just for benchmarking purposes. */
395 dumb_memcpy (void *dest, const void *src, unsigned int len)
404 benchmark (FILE * from)
406 unsigned char inb[BLOCK_SIZE];
407 unsigned char outb[BLOCK_SIZE];
408 unsigned int in_len = fread (inb, 1, BLOCK_SIZE, from);
409 unsigned int out_len;
412 perror ("can't read from input");
416 unsigned int calib_loop;
417 unsigned int per_loop;
424 while ((clock () - start) < CLOCKS_PER_SEC / 4)
427 for (i = 0; i < calib_loop; i++)
428 dumb_memcpy (outb, inb, in_len);
429 per_loop += calib_loop;
432 fprintf (stderr, "memcpy:\nCalibrated to %d iterations per loop\n",
436 for (i = 0; i < 10; i++)
437 for (j = 0; j < per_loop; j++)
438 dumb_memcpy (outb, inb, in_len);
440 float seconds = (end - start) / (float) CLOCKS_PER_SEC;
441 fprintf (stderr, "%.2f seconds == %.2f MB/s\n", seconds,
442 ((long long) in_len * per_loop * 10) / (1024 * 1024 * seconds));
447 while ((clock () - start) < CLOCKS_PER_SEC / 4)
450 for (i = 0; i < calib_loop; i++)
451 compress_buf (inb, in_len, outb, sizeof (outb));
452 per_loop += calib_loop;
455 fprintf (stderr, "compression:\nCalibrated to %d iterations per loop\n",
459 for (i = 0; i < 10; i++)
460 for (j = 0; j < per_loop; j++)
461 compress_buf (inb, in_len, outb, sizeof (outb));
463 seconds = (end - start) / (float) CLOCKS_PER_SEC;
464 fprintf (stderr, "%.2f seconds == %.2f MB/s\n", seconds,
465 ((long long) in_len * per_loop * 10) / (1024 * 1024 * seconds));
467 out_len = compress_buf (inb, in_len, outb, sizeof (outb));
472 while ((clock () - start) < CLOCKS_PER_SEC / 4)
475 for (i = 0; i < calib_loop; i++)
476 unchecked_decompress_buf (outb, out_len, inb, sizeof (inb));
477 per_loop += calib_loop;
480 fprintf (stderr, "decompression:\nCalibrated to %d iterations per loop\n",
484 for (i = 0; i < 10; i++)
485 for (j = 0; j < per_loop; j++)
486 unchecked_decompress_buf (outb, out_len, inb, sizeof (inb));
488 seconds = (end - start) / (float) CLOCKS_PER_SEC;
489 fprintf (stderr, "%.2f seconds == %.2f MB/s\n", seconds,
490 ((long long) in_len * per_loop * 10) / (1024 * 1024 * seconds));
494 main (int argc, char *argv[])
497 if (argc > 1 && !strcmp (argv[1], "-d"))
499 if (argc > 1 && !strcmp (argv[1], "-b"))
502 transfer_file (stdin, stdout, compress);