3 #include <gsf/gsf-utils.h>
4 #include <gsf/gsf-msole-utils.h>
5 #include <gsf/gsf-input-stdio.h>
6 #include <gsf/gsf-output-stdio.h>
8 #define VBA_COMPRESSION_WINDOW 4096
12 /* Brute force and ugliness ! */
14 guint8 inblock[VBA_COMPRESSION_WINDOW];
28 byte_to_char (guint8 data)
30 return data >= 0x20 && data < 126 ? data : '.';
34 get_shift (guint cur_pos)
38 if (cur_pos <= 0x80) {
40 shift = (cur_pos <= 0x10) ? 12 : 11;
42 shift = (cur_pos <= 0x40) ? 10 : 9;
45 shift = (cur_pos <= 0x100) ? 8 : 7;
46 else if (cur_pos <= 0x800)
47 shift = (cur_pos <= 0x400) ? 6 : 5;
56 find_match (CompressBuf *buf, guint pos, guint *len)
59 guint max_match = (1 << get_shift (pos)) - 1;
61 /* FIXME: the MS impl. does different to a linear search here
62 and is not very good at this either; is happy to get much
63 worse matches; perhaps some single-entry match lookup table ?
64 it seems to ~regularly truncate strings, and get earlier
65 / later matches of equivalent length with no predictability
68 for (i = pos - 1; i >= 0; i--) {
71 for (j = 0; j < buf->length - pos && j < pos; j++)
72 if (buf->inblock[pos + j] != buf->inblock[i + j])
76 *len = MIN (j, max_match);
84 output_data (CompressBuf *buf, guint8 *data, gboolean compressed)
87 buf->mask |= 1 << buf->shift;
88 g_string_append_c (buf->outstr, data [0]);
89 g_string_append_c (buf->outstr, data [1]);
91 g_string_append_c (buf->outstr, data [0]);
94 if (buf->shift == 8) {
97 gsf_output_write (buf->output, 1, &buf->mask);
98 gsf_output_write (buf->output, buf->outstr->len, buf->outstr->str);
101 fprintf (stderr, "Block: 0x%x '", buf->mask);
102 for (i = 0; i < buf->outstr->len; i++)
103 fprintf (stderr, "%c", byte_to_char (buf->outstr->str[i]));
104 fprintf (stderr, "'\n");
109 g_string_set_size (buf->outstr, 0);
114 output_match (CompressBuf *buf, guint cur_pos, guint pos, guint len)
116 int shift, token, distance;
119 shift = get_shift (cur_pos);
121 distance = cur_pos - pos - 1;
123 /* Window size issue !? - get a better match later with a larger window !? */
125 token = (distance << shift) + ((len - 3) & ((1<<(shift + 1))-1));
126 data[0] = token & 0xff;
127 data[1] = token >> 8;
130 fprintf (stderr, "shift %d, [0x%x(%d) - %d], len %d, distance %d bytes %.2x %.2x\n",
131 shift, cur_pos, cur_pos, pos, len, distance,
133 if (cur_pos + len >= (1u<<shift))
134 fprintf (stderr, "Overlaps boundary\n");
137 output_data (buf, data, TRUE);
141 compress_block (CompressBuf *buf)
146 for (pos = 0; pos < buf->length;) {
147 if ((match = find_match (buf, pos, &len)) >= 0) {
148 output_match (buf, pos, match, len);
151 output_data (buf, &(buf->inblock[pos++]), FALSE);
156 do_compress (GsfInput *input, GsfOutput *output)
158 CompressBuf real_buf, *buf;
160 guint8 data[HEADER_SIZE];
164 memset (buf, 0, sizeof (CompressBuf));
165 buf->output = output;
166 buf->outstr = g_string_sized_new (20);
171 gsf_output_write (buf->output, 3, data); /* dummy padding */
173 string = g_string_sized_new (64);
175 while (gsf_input_remaining (input) > 0) {
176 buf->length = MIN (gsf_input_remaining (input), VBA_COMPRESSION_WINDOW);
177 if (!gsf_input_read (input, buf->length, buf->inblock))
178 g_error ("Failed to read %d bytes\n", buf->length);
179 compress_block (buf);
182 if (buf->outstr->len) {
183 gsf_output_write (buf->output, 1, &buf->mask);
184 gsf_output_write (buf->output, buf->outstr->len, buf->outstr->str);
187 length = gsf_output_size (buf->output) - 3 - 1;
188 if (length > 0x0c0c) /* TESTME: is this really right ? */
190 data[1] = length & 0xff;
191 data[2] |= (length >> 8);
192 gsf_output_seek (output, 0, G_SEEK_SET);
193 gsf_output_write (buf->output, 3, data); /* real data */
197 do_decompress (GsfInput *input, GsfOutput *output)
199 gboolean err = FALSE;
200 guint8 data[HEADER_SIZE];
201 GByteArray *decompressed;
204 err |= !gsf_input_read (input, HEADER_SIZE, data);
205 if (data [0] != 0x01)
206 fprintf (stderr, "Odd pre-amble byte 0x%x\n", data[0]);
207 if ((data [2] & 0xf0) != 0xb0)
208 fprintf (stderr, "Odd high nibble 0x%x\n", (data[2] & 0xf0));
209 comp_len = ((data[2] & 0x0f) << 8) + data[1];
210 if (comp_len + 1 != gsf_input_size (input) - 3)
211 fprintf (stderr, "Size mismatch %d %d\n",
212 comp_len + 1, (int) (gsf_input_size (input) - 3));
214 decompressed = gsf_msole_inflate (input, 3);
216 fprintf (stderr, "Failed to decompress\n");
218 size = decompressed->len;
219 err |= !gsf_output_write (output, size,
220 g_byte_array_free (decompressed, FALSE));
223 fprintf (stderr, "I/O error\n");
229 decode_dir (GsfInput *input)
231 gboolean err = FALSE;
235 while (gsf_input_remaining (input) && !err) {
239 gboolean ascii = FALSE;
240 gboolean unicode = FALSE;
241 gboolean offset = FALSE;
243 err |= !gsf_input_read (input, 6, data);
245 op = GSF_LE_GET_GUINT16 (&data[0]);
246 length = GSF_LE_GET_GUINT32 (&data[2]);
249 fprintf (stderr, "** Quirk fix **\n");
253 /* Special nasties / up-stream bugs */
273 fprintf (stderr, "0x%.6x Op %3d 0x%.2x, length %3d: '",
274 (int)gsf_input_tell (input), op_count, op, length);
276 if (length > gsf_input_remaining (input)) {
277 fprintf (stderr, "Broken - foo !\n");
278 length = MIN (64, gsf_input_remaining (input));
282 if (ascii || unicode) {
283 int advance = ascii ? 1 : 2;
284 /* quick and dirty for now */
285 for (i = 0 ; i < length; i += advance) {
287 err |= !gsf_input_read (input, advance, &ug);
288 fprintf (stderr, "%c", byte_to_char (ug));
290 fprintf (stderr, "' - '%s", ascii ? "Ascii" : "Unicode");
294 g_assert (length == 4);
295 err |= !gsf_input_read (input, 4, data);
296 offset = GSF_LE_GET_GUINT32 (data);
297 fprintf (stderr, "0x%.8x' - 'Offset", offset);
299 GString *chars = g_string_new ("");
301 for (i = 0 ; i < length; i++) {
303 err |= !gsf_input_read (input, 1, &ug);
304 fprintf (stderr, "%.2x ", ug);
305 g_string_append_printf (chars, "%c", byte_to_char (ug));
307 fprintf (stderr, "' - '%s", chars->str);
308 g_string_free (chars, TRUE);
310 fprintf (stderr, "'\n");
317 main (int argc, char *argv[])
320 char const *src = NULL;
321 char const *dest = NULL;
324 GError *error = NULL;
327 gboolean dir = FALSE;
328 gboolean compress = FALSE;
332 for (i = 1; i < argc; i++) {
333 if (argv[i][0] == '-') {
334 switch (argv[i][1]) {
342 fprintf (stderr, "Unknown option '%s'\n", argv[i]);
352 if (!src || (!dir && !dest)) {
353 fprintf (stderr, "%s: [-c(ompress)] <infile> <outfile>\n", argv[0]);
354 fprintf (stderr, "%s: [-d(ecode dir)] <infile>\n", argv[0]);
358 input = gsf_input_stdio_new (src, &error);
362 output = gsf_output_stdio_new (dest, &error);
363 if (!input || !output) {
364 fprintf (stderr, "Failed to open input(%p)/output(%p): '%s'\n",
365 input, output, error ? error->message : "<NoMsg>");
370 do_compress (input, output);
372 do_decompress (input, output);
374 g_object_unref (output);
377 g_object_unref (input);