1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * 842 Software Compression
5 * Copyright (C) 2015 Dan Streetman, IBM Corp
7 * See 842.h for details of the 842 compressed format.
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 #define MODULE_NAME "842_compress"
13 #include <linux/hashtable.h>
16 #include "842_debugfs.h"
18 #define SW842_HASHTABLE8_BITS (10)
19 #define SW842_HASHTABLE4_BITS (11)
20 #define SW842_HASHTABLE2_BITS (10)
22 /* By default, we allow compressing input buffers of any length, but we must
23 * use the non-standard "short data" template so the decompressor can correctly
24 * reproduce the uncompressed data buffer at the right length. However the
25 * hardware 842 compressor will not recognize the "short data" template, and
26 * will fail to decompress any compressed buffer containing it (I have no idea
27 * why anyone would want to use software to compress and hardware to decompress
28 * but that's beside the point). This parameter forces the compression
29 * function to simply reject any input buffer that isn't a multiple of 8 bytes
30 * long, instead of using the "short data" template, so that all compressed
31 * buffers produced by this function will be decompressable by the 842 hardware
32 * decompressor. Unless you have a specific need for that, leave this disabled
33 * so that any length buffer can be compressed.
35 static bool sw842_strict;
36 module_param_named(strict, sw842_strict, bool, 0644);
38 static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */
39 { I8, N0, N0, N0, 0x19 }, /* 8 */
40 { I4, I4, N0, N0, 0x18 }, /* 18 */
41 { I4, I2, I2, N0, 0x17 }, /* 25 */
42 { I2, I2, I4, N0, 0x13 }, /* 25 */
43 { I2, I2, I2, I2, 0x12 }, /* 32 */
44 { I4, I2, D2, N0, 0x16 }, /* 33 */
45 { I4, D2, I2, N0, 0x15 }, /* 33 */
46 { I2, D2, I4, N0, 0x0e }, /* 33 */
47 { D2, I2, I4, N0, 0x09 }, /* 33 */
48 { I2, I2, I2, D2, 0x11 }, /* 40 */
49 { I2, I2, D2, I2, 0x10 }, /* 40 */
50 { I2, D2, I2, I2, 0x0d }, /* 40 */
51 { D2, I2, I2, I2, 0x08 }, /* 40 */
52 { I4, D4, N0, N0, 0x14 }, /* 41 */
53 { D4, I4, N0, N0, 0x04 }, /* 41 */
54 { I2, I2, D4, N0, 0x0f }, /* 48 */
55 { I2, D2, I2, D2, 0x0c }, /* 48 */
56 { I2, D4, I2, N0, 0x0b }, /* 48 */
57 { D2, I2, I2, D2, 0x07 }, /* 48 */
58 { D2, I2, D2, I2, 0x06 }, /* 48 */
59 { D4, I2, I2, N0, 0x03 }, /* 48 */
60 { I2, D2, D4, N0, 0x0a }, /* 56 */
61 { D2, I2, D4, N0, 0x05 }, /* 56 */
62 { D4, I2, D2, N0, 0x02 }, /* 56 */
63 { D4, D2, I2, N0, 0x01 }, /* 56 */
64 { D8, N0, N0, N0, 0x00 }, /* 64 */
67 struct sw842_hlist_node8 {
68 struct hlist_node node;
73 struct sw842_hlist_node4 {
74 struct hlist_node node;
79 struct sw842_hlist_node2 {
80 struct hlist_node node;
85 #define INDEX_NOT_FOUND (-1)
86 #define INDEX_NOT_CHECKED (-2)
101 DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS);
102 DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS);
103 DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS);
104 struct sw842_hlist_node8 node8[1 << I8_BITS];
105 struct sw842_hlist_node4 node4[1 << I4_BITS];
106 struct sw842_hlist_node2 node2[1 << I2_BITS];
109 #define get_input_data(p, o, b) \
110 be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o))))
112 #define init_hashtable_nodes(p, b) do { \
114 hash_init((p)->htable##b); \
115 for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) { \
116 (p)->node##b[_i].index = _i; \
117 (p)->node##b[_i].data = 0; \
118 INIT_HLIST_NODE(&(p)->node##b[_i].node); \
122 #define find_index(p, b, n) ({ \
123 struct sw842_hlist_node##b *_n; \
124 p->index##b[n] = INDEX_NOT_FOUND; \
125 hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) { \
126 if (p->data##b[n] == _n->data) { \
127 p->index##b[n] = _n->index; \
131 p->index##b[n] >= 0; \
134 #define check_index(p, b, n) \
135 ((p)->index##b[n] == INDEX_NOT_CHECKED \
136 ? find_index(p, b, n) \
137 : (p)->index##b[n] >= 0)
139 #define replace_hash(p, b, i, d) do { \
140 struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)]; \
141 hash_del(&_n->node); \
142 _n->data = (p)->data##b[d]; \
143 pr_debug("add hash index%x %x pos %x data %lx\n", b, \
144 (unsigned int)_n->index, \
145 (unsigned int)((p)->in - (p)->instart), \
146 (unsigned long)_n->data); \
147 hash_add((p)->htable##b, &_n->node, _n->data); \
150 static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
152 static int add_bits(struct sw842_param *p, u64 d, u8 n);
154 static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s)
161 ret = add_bits(p, d >> s, n - s);
164 return add_bits(p, d & GENMASK_ULL(s - 1, 0), s);
167 static int add_bits(struct sw842_param *p, u64 d, u8 n)
169 int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits;
173 pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d);
178 /* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0),
179 * or if we're at the end of the output buffer and would write past end
182 return __split_add_bits(p, d, n, 32);
183 else if (p->olen < 8 && bits > 32 && bits <= 56)
184 return __split_add_bits(p, d, n, 16);
185 else if (p->olen < 4 && bits > 16 && bits <= 24)
186 return __split_add_bits(p, d, n, 8);
188 if (DIV_ROUND_UP(bits, 8) > p->olen)
197 put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out);
199 put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out);
201 put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out);
203 put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out);
205 put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out);
207 put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out);
209 put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out);
214 p->out += p->bit / 8;
215 p->olen -= p->bit / 8;
222 static int add_template(struct sw842_param *p, u8 c)
231 pr_debug("template %x\n", t[4]);
233 ret = add_bits(p, t[4], OP_BITS);
237 for (i = 0; i < 4; i++) {
238 pr_debug("op %x\n", t[i]);
240 switch (t[i] & OP_AMOUNT) {
244 else if (t[i] & OP_ACTION_INDEX)
245 ret = add_bits(p, p->index8[0], I8_BITS);
246 else if (t[i] & OP_ACTION_DATA)
247 ret = add_bits(p, p->data8[0], 64);
252 if (b == 2 && t[i] & OP_ACTION_DATA)
253 ret = add_bits(p, get_input_data(p, 2, 32), 32);
254 else if (b != 0 && b != 4)
256 else if (t[i] & OP_ACTION_INDEX)
257 ret = add_bits(p, p->index4[b >> 2], I4_BITS);
258 else if (t[i] & OP_ACTION_DATA)
259 ret = add_bits(p, p->data4[b >> 2], 32);
264 if (b != 0 && b != 2 && b != 4 && b != 6)
266 if (t[i] & OP_ACTION_INDEX)
267 ret = add_bits(p, p->index2[b >> 1], I2_BITS);
268 else if (t[i] & OP_ACTION_DATA)
269 ret = add_bits(p, p->data2[b >> 1], 16);
274 inv = (b != 8) || !(t[i] & OP_ACTION_NOOP);
285 pr_err("Invalid templ %x op %d : %x %x %x %x\n",
286 c, i, t[0], t[1], t[2], t[3]);
290 b += t[i] & OP_AMOUNT;
294 pr_err("Invalid template %x len %x : %x %x %x %x\n",
295 c, b, t[0], t[1], t[2], t[3]);
299 if (sw842_template_counts)
300 atomic_inc(&template_count[t[4]]);
305 static int add_repeat_template(struct sw842_param *p, u8 r)
309 /* repeat param is 0-based */
310 if (!r || --r > REPEAT_BITS_MAX)
313 ret = add_bits(p, OP_REPEAT, OP_BITS);
317 ret = add_bits(p, r, REPEAT_BITS);
321 if (sw842_template_counts)
322 atomic_inc(&template_repeat_count);
327 static int add_short_data_template(struct sw842_param *p, u8 b)
331 if (!b || b > SHORT_DATA_BITS_MAX)
334 ret = add_bits(p, OP_SHORT_DATA, OP_BITS);
338 ret = add_bits(p, b, SHORT_DATA_BITS);
342 for (i = 0; i < b; i++) {
343 ret = add_bits(p, p->in[i], 8);
348 if (sw842_template_counts)
349 atomic_inc(&template_short_data_count);
354 static int add_zeros_template(struct sw842_param *p)
356 int ret = add_bits(p, OP_ZEROS, OP_BITS);
361 if (sw842_template_counts)
362 atomic_inc(&template_zeros_count);
367 static int add_end_template(struct sw842_param *p)
369 int ret = add_bits(p, OP_END, OP_BITS);
374 if (sw842_template_counts)
375 atomic_inc(&template_end_count);
380 static bool check_template(struct sw842_param *p, u8 c)
388 for (i = 0; i < 4; i++) {
389 if (t[i] & OP_ACTION_INDEX) {
390 if (t[i] & OP_AMOUNT_2)
391 match = check_index(p, 2, b >> 1);
392 else if (t[i] & OP_AMOUNT_4)
393 match = check_index(p, 4, b >> 2);
394 else if (t[i] & OP_AMOUNT_8)
395 match = check_index(p, 8, 0);
402 b += t[i] & OP_AMOUNT;
408 static void get_next_data(struct sw842_param *p)
410 p->data8[0] = get_input_data(p, 0, 64);
411 p->data4[0] = get_input_data(p, 0, 32);
412 p->data4[1] = get_input_data(p, 4, 32);
413 p->data2[0] = get_input_data(p, 0, 16);
414 p->data2[1] = get_input_data(p, 2, 16);
415 p->data2[2] = get_input_data(p, 4, 16);
416 p->data2[3] = get_input_data(p, 6, 16);
419 /* update the hashtable entries.
420 * only call this after finding/adding the current template
421 * the dataN fields for the current 8 byte block must be already updated
423 static void update_hashtables(struct sw842_param *p)
425 u64 pos = p->in - p->instart;
426 u64 n8 = (pos >> 3) % (1 << I8_BITS);
427 u64 n4 = (pos >> 2) % (1 << I4_BITS);
428 u64 n2 = (pos >> 1) % (1 << I2_BITS);
430 replace_hash(p, 8, n8, 0);
431 replace_hash(p, 4, n4, 0);
432 replace_hash(p, 4, n4, 1);
433 replace_hash(p, 2, n2, 0);
434 replace_hash(p, 2, n2, 1);
435 replace_hash(p, 2, n2, 2);
436 replace_hash(p, 2, n2, 3);
439 /* find the next template to use, and add it
440 * the p->dataN fields must already be set for the current 8 byte block
442 static int process_next(struct sw842_param *p)
446 p->index8[0] = INDEX_NOT_CHECKED;
447 p->index4[0] = INDEX_NOT_CHECKED;
448 p->index4[1] = INDEX_NOT_CHECKED;
449 p->index2[0] = INDEX_NOT_CHECKED;
450 p->index2[1] = INDEX_NOT_CHECKED;
451 p->index2[2] = INDEX_NOT_CHECKED;
452 p->index2[3] = INDEX_NOT_CHECKED;
454 /* check up to OPS_MAX - 1; last op is our fallback */
455 for (i = 0; i < OPS_MAX - 1; i++) {
456 if (check_template(p, i))
460 ret = add_template(p, i);
470 * Compress the uncompressed buffer of length @ilen at @in to the output buffer
471 * @out, using no more than @olen bytes, using the 842 compression format.
473 * Returns: 0 on success, error on failure. The @olen parameter
474 * will contain the number of output bytes written on success, or
477 int sw842_compress(const u8 *in, unsigned int ilen,
478 u8 *out, unsigned int *olen, void *wmem)
480 struct sw842_param *p = (struct sw842_param *)wmem;
482 u64 last, next, pad, total;
486 BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS);
488 init_hashtable_nodes(p, 8);
489 init_hashtable_nodes(p, 4);
490 init_hashtable_nodes(p, 2);
503 /* if using strict mode, we can only compress a multiple of 8 */
504 if (sw842_strict && (ilen % 8)) {
505 pr_err("Using strict mode, can't compress len %d\n", ilen);
509 /* let's compress at least 8 bytes, mkay? */
510 if (unlikely(ilen < 8))
513 /* make initial 'last' different so we don't match the first time */
514 last = ~get_unaligned((u64 *)p->in);
516 while (p->ilen > 7) {
517 next = get_unaligned((u64 *)p->in);
519 /* must get the next data, as we need to update the hashtable
520 * entries with the new data every time
524 /* we don't care about endianness in last or next;
525 * we're just comparing 8 bytes to another 8 bytes,
526 * they're both the same endianness
529 /* repeat count bits are 0-based, so we stop at +1 */
530 if (++repeat_count <= REPEAT_BITS_MAX)
534 ret = add_repeat_template(p, repeat_count);
536 if (next == last) /* reached max repeat bits */
541 ret = add_zeros_template(p);
543 ret = process_next(p);
550 update_hashtables(p);
556 ret = add_repeat_template(p, repeat_count);
563 ret = add_short_data_template(p, p->ilen);
571 ret = add_end_template(p);
576 * crc(0:31) is appended to target data starting with the next
577 * bit after End of stream template.
578 * nx842 calculates CRC for data in big-endian format. So doing
579 * same here so that sw842 decompression can be used for both
582 crc = crc32_be(0, in, ilen);
583 ret = add_bits(p, crc, CRC_BITS);
593 /* pad compressed length to multiple of 8 */
594 pad = (8 - ((total - p->olen) % 8)) % 8;
596 if (pad > p->olen) /* we were so close! */
598 memset(p->out, 0, pad);
603 if (unlikely((total - p->olen) > UINT_MAX))
606 *olen = total - p->olen;
610 EXPORT_SYMBOL_GPL(sw842_compress);
612 static int __init sw842_init(void)
614 if (sw842_template_counts)
615 sw842_debugfs_create();
619 module_init(sw842_init);
621 static void __exit sw842_exit(void)
623 if (sw842_template_counts)
624 sw842_debugfs_remove();
626 module_exit(sw842_exit);
628 MODULE_LICENSE("GPL");
629 MODULE_DESCRIPTION("Software 842 Compressor");
630 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");