1 /* unlzw.c -- decompress files in LZW format.
2 * The code in this file is directly derived from the public domain 'compress'
3 * written by Spencer Thomas, Joe Orost, James Woods, Jim McKie, Steve Davies,
4 * Ken Turkowski, Dave Mack and Peter Jannesen.
6 * This is a temporary version which will be rewritten in some future version
7 * to accommodate in-memory decompression.
9 * Tue Dec 12 17:54:07 CET 2006 - werner@suse.de
10 * Be able to emulate a zlib-like behaviour: open, read, and close .Z files
11 * in memory. I'm using context switchting and a global allocated structure
12 * to be able to read during the main loop in unlzw() does its work. For this
13 * nearly _all_ variables affected by the context switch are forward to this
14 * structure, even the stack and the context type its self.
15 * The original source was adopted from the gzip version 1.3.7.
26 #define DIST_BUFSIZE 0x2000
27 #define INBUFSIZ 0x2000
29 #define INBUF_EXTRA 0x40
30 #define OUTBUFSIZ 0x2000
31 #define OUTBUF_EXTRA 0x800
32 #define STACK_SIZE 0x1000
36 static void unlzw(LZW_t *in);
38 /* ===========================================================================
39 * Fill the input buffer. This is called only when the buffer is empty.
40 * Adopted from gzip version 1.3.7 util.c
43 static __inline__ int fill_inbuf(LZW_t *in)
45 /* Read as much as possible */
48 ssize_t len = read(in->ifd, in->inbuf + in->insize, INBUFSIZ - in->insize);
58 } while (in->insize < INBUFSIZ);
63 in->bytes_in += (off_t)in->insize;
65 return (in->inbuf)[0];
68 /* ===========================================================================
69 * Does the same as write(), but also handles partial pipe writes and checks
71 * Adopted from gzip version 1.3.7 util.c
72 * Note that this version uses context switching, switch back to old context.
75 static __inline__ void write_buf(LZW_t *in, const unsigned char* buf, size_t cnt)
78 if ((in->tsize = (in->tcount > cnt) ? cnt : in->tcount)) {
79 (void)memcpy(in->transfer, buf, in->tsize);
83 swapcontext(&in->uc[1], &in->uc[0]);
87 #define get_byte(in) ((in)->inptr < (in)->insize ? (in)->inbuf[(in)->inptr++] : fill_inbuf((in)))
88 #define memzero(s,n) memset ((void*)(s), 0, (n))
90 #define MAXCODE(n) (1L << (n))
93 # define BYTEORDER 0000
104 #if BYTEORDER == 4321
110 #if BYTEORDER == 1234
123 #if BYTEORDER == 4321 && NOALLIGN == 1
124 # define input(b,o,c,n,m){ \
125 (c) = (*(uint32_t *)(&(b)[(o)>>3])>>((o)&0x7))&(m); \
129 # define input(b,o,c,n,m){ \
130 uint8_t *p = &(b)[(o)>>3]; \
131 (c) = ((((long)(p[0]))|((long)(p[1])<<8)| \
132 ((long)(p[2])<<16))>>((o)&0x7))&(m); \
137 #define tab_prefixof(i) in->tab_prefix[i]
138 #define clear_tab_prefixof(in) memzero((in)->tab_prefix, sizeof(unsigned short)*(1<<(BITS)));
139 #define de_stack ((uint8_t *)(&in->d_buf[DIST_BUFSIZE-1]))
140 #define tab_suffixof(i) in->tab_suffix[i]
142 /* ============================================================================
143 * Decompress in to out. This routine adapts to the codes in the
144 * file building the "string" table on-the-fly; requiring no table to
145 * be stored in the compressed file.
146 * IN assertions: the buffer inbuf contains already the beginning of
147 * the compressed data, from offsets iptr to insize-1 included.
148 * The magic header has already been checked and skipped.
149 * bytes_in and bytes_out have been initialized.
151 * Adopted from gzip version 1.3.7 unlzw.c
152 * This is mainly the head of the old unlzw() before its main loop.
154 LZW_t *openlzw(const char * path, const char *mode)
157 if (!mode || *mode != 'r')
159 if ((fildes = open(path, O_RDONLY)) < 0)
161 return dopenlzw(fildes, mode);
166 LZW_t *dopenlzw(int fildes, const char *mode)
168 LZW_t *in = (LZW_t*)0;
170 sigset_t sigmask, oldmask;
172 if (!mode || *mode != 'r')
175 if ((in = (LZW_t*)malloc(sizeof(LZW_t))) == (LZW_t*)0)
177 memset(in, 0, sizeof(LZW_t));
179 if ((in->inbuf = (uint8_t*)malloc(sizeof(uint8_t)*(INBUFSIZ))) == (uint8_t*)0)
181 if ((in->outbuf = (uint8_t*)malloc(sizeof(uint8_t)*(OUTBUFSIZ+OUTBUF_EXTRA))) == (uint8_t*)0)
183 if ((in->d_buf = (uint16_t*)malloc(sizeof(uint16_t)*DIST_BUFSIZE)) == (uint16_t*)0)
185 if ((in->tab_suffix = (uint8_t*) malloc(sizeof(uint8_t )*(2L*WSIZE))) == (uint8_t*)0)
187 if ((in->tab_prefix = (uint16_t*)malloc(sizeof(uint16_t)*(1<<(BITS)))) == (uint16_t*)0)
189 if ((in->ifd = fildes) < 0)
192 if ((in->stack = (uint8_t*)malloc(STACK_SIZE)) == (uint8_t*)0)
194 if ((in->uc = (ucontext_t*)malloc(2*sizeof(ucontext_t))) == (ucontext_t*)0)
196 if (getcontext(&in->uc[1]) < 0)
198 in->uc[1].uc_link = &in->uc[0];
199 in->uc[1].uc_stack.ss_sp = in->stack;
200 in->uc[1].uc_stack.ss_size = STACK_SIZE;
201 if (sigucmask(SIG_SETMASK, (sigset_t*)0, &sigmask) < 0)
203 if (sigaddset(&sigmask, SIGINT) < 0)
205 if (sigaddset(&sigmask, SIGQUIT) < 0)
207 #if defined(__ia64__) && defined(uc_sigmask) /* On ia64 the type of uc_sigmask is ulong not sigset_t */
208 in->uc[1].uc_sigmask = sig_ia64_mask(sigmask);
210 in->uc[1].uc_sigmask = sigmask;
212 makecontext(&in->uc[1], (void(*)(void))unlzw, 1, in);
214 sigucmask(SIG_SETMASK, &sigmask, &oldmask);
215 magic[0] = get_byte(in);
216 magic[1] = get_byte(in);
217 sigucmask(SIG_SETMASK, &oldmask, &sigmask);
219 if (memcmp(magic, LZW_MAGIC, sizeof(magic)))
222 in->n.block_mode = BLOCK_MODE; /* block compress mode -C compatible with 2.0 */
223 in->rsize = in->insize;
225 in->n.maxbits = get_byte(in);
226 in->n.block_mode = in->n.maxbits & BLOCK_MODE;
228 if ((in->n.maxbits & LZW_RESERVED) != 0) {
229 fprintf(stderr, "%s: warning, unknown flags 0x%x\n",
230 __FUNCTION__, in->n.maxbits & LZW_RESERVED);
232 in->n.maxbits &= BIT_MASK;
233 in->n.maxmaxcode = MAXCODE(in->n.maxbits);
235 if (in->n.maxbits > BITS) {
236 fprintf(stderr, "%s: compressed with %d bits, can only handle %d bits\n",
237 __FUNCTION__, in->n.maxbits, BITS);
241 in->n.maxcode = MAXCODE(in->n.n_bits = INIT_BITS)-1;
242 in->n.bitmask = (1<<in->n.n_bits)-1;
245 in->n.posbits = in->inptr<<3;
247 in->n.free_ent = ((in->n.block_mode) ? FIRST : 256);
249 clear_tab_prefixof(in); /* Initialize the first 256 entries in the table. */
251 for (in->n.code = 255 ; in->n.code >= 0 ; --in->n.code) {
252 tab_suffixof(in->n.code) = (uint8_t)in->n.code;
262 * New function, simply to free all allocated objects in
263 * reverse order and close the input file.
265 void closelzw(LZW_t * in)
269 if (in->uc) free(in->uc);
270 if (in->stack) free(in->stack);
271 if (in->ifd >= 0) close(in->ifd);
272 if (in->tab_prefix) free(in->tab_prefix);
273 if (in->tab_suffix) free(in->tab_suffix);
274 if (in->d_buf) free(in->d_buf);
275 if (in->outbuf) free(in->outbuf);
276 if (in->inbuf) free(in->inbuf);
282 * Adopted from gzip version 1.3.7 unlzw.c
283 * This is mainly the body of the old unlzw() which is its main loop.
285 static void unlzw(LZW_t *in)
293 e = in->insize - (o = (in->n.posbits>>3));
295 for (i = 0 ; i < e ; ++i) {
296 in->inbuf[i] = in->inbuf[i+o];
301 if (in->insize < INBUF_EXTRA) {
303 in->rsize = read(in->ifd, in->inbuf + in->insize, INBUFSIZ - in->insize);
307 perror(__FUNCTION__);
312 in->insize += in->rsize;
313 } while (in->insize < INBUFSIZ);
314 in->bytes_in += (off_t)in->insize;
316 in->n.inbits = ((in->rsize != 0) ? ((long)in->insize - in->insize%in->n.n_bits)<<3 :
317 ((long)in->insize<<3)-(in->n.n_bits-1));
319 while (in->n.inbits > in->n.posbits) {
320 if (in->n.free_ent > in->n.maxcode) {
321 in->n.posbits = ((in->n.posbits-1) +
322 ((in->n.n_bits<<3)-(in->n.posbits-1+(in->n.n_bits<<3))%(in->n.n_bits<<3)));
324 if (in->n.n_bits == in->n.maxbits) {
325 in->n.maxcode = in->n.maxmaxcode;
327 in->n.maxcode = MAXCODE(in->n.n_bits)-1;
329 in->n.bitmask = (1<<in->n.n_bits)-1;
332 input(in->inbuf,in->n.posbits,in->n.code,in->n.n_bits,in->n.bitmask);
334 if (in->n.oldcode == -1) {
335 if (256 <= in->n.code)
336 fprintf(stderr, "%s: corrupt input.\n", __FUNCTION__);
337 in->outbuf[in->outpos++] = (uint8_t)(in->n.finchar = (int)(in->n.oldcode=in->n.code));
340 if (in->n.code == CLEAR && in->n.block_mode) {
341 clear_tab_prefixof(in);
342 in->n.free_ent = FIRST - 1;
343 in->n.posbits = ((in->n.posbits-1) +
344 ((in->n.n_bits<<3)-(in->n.posbits-1+(in->n.n_bits<<3))%(in->n.n_bits<<3)));
345 in->n.maxcode = MAXCODE(in->n.n_bits = INIT_BITS)-1;
346 in->n.bitmask = (1<<in->n.n_bits)-1;
349 in->n.incode = in->n.code;
350 in->n.stackp = de_stack;
352 if (in->n.code >= in->n.free_ent) { /* Special case for KwKwK string. */
353 if (in->n.code > in->n.free_ent) {
356 in->n.posbits -= in->n.n_bits;
357 p = &in->inbuf[in->n.posbits>>3];
359 "code:%ld free_ent:%ld n_bits:%d insize:%lu\n",
360 in->n.code, in->n.free_ent, in->n.n_bits, in->insize);
362 "posbits:%ld inbuf:%02X %02X %02X %02X %02X\n",
363 in->n.posbits, p[-1],p[0],p[1],p[2],p[3]);
365 if (in->outpos > 0) {
366 write_buf(in, in->outbuf, in->outpos);
367 in->bytes_out += (off_t)in->outpos;
370 fprintf(stderr, "%s: corrupt input.\n", __FUNCTION__);
372 *--in->n.stackp = (uint8_t)in->n.finchar;
373 in->n.code = in->n.oldcode;
376 /* Generate output characters in reverse order */
377 while ((uint32_t)in->n.code >= (uint32_t)256) {
378 *--in->n.stackp = tab_suffixof(in->n.code);
379 in->n.code = tab_prefixof(in->n.code);
381 *--in->n.stackp = (uint8_t)(in->n.finchar = tab_suffixof(in->n.code));
383 /* And put them out in forward order */
384 if (in->outpos + (in->n.newdif = (de_stack - in->n.stackp)) >= OUTBUFSIZ) {
386 if (in->n.newdif > OUTBUFSIZ - in->outpos)
387 in->n.newdif = OUTBUFSIZ - in->outpos;
389 if (in->n.newdif > 0) {
390 memcpy(in->outbuf + in->outpos, in->n.stackp, in->n.newdif);
391 in->outpos += in->n.newdif;
393 if (in->outpos >= OUTBUFSIZ) {
394 write_buf(in, in->outbuf, in->outpos);
395 in->bytes_out += (off_t)in->outpos;
398 in->n.stackp+= in->n.newdif;
399 } while ((in->n.newdif = (de_stack - in->n.stackp)) > 0);
401 memcpy(in->outbuf + in->outpos, in->n.stackp, in->n.newdif);
402 in->outpos += in->n.newdif;
405 if ((in->n.code = in->n.free_ent) < in->n.maxmaxcode) { /* Generate the new entry. */
407 tab_prefixof(in->n.code) = (uint16_t)in->n.oldcode;
408 tab_suffixof(in->n.code) = (uint8_t)in->n.finchar;
409 in->n.free_ent = in->n.code+1;
411 in->n.oldcode = in->n.incode; /* Remember previous code. */
413 } while (in->rsize != 0);
415 if (in->outpos > 0) {
416 write_buf(in, in->outbuf, in->outpos);
417 in->bytes_out += (off_t)in->outpos;
424 * New function, simply to read from the output buffer of unlzw().
425 * We do this by switching into the context of unlzw() and back
426 * to our old context if the provided buffer is filled.
428 ssize_t readlzw(LZW_t * in, char* buffer, const size_t size)
430 in->transfer = (uint8_t*)buffer;
433 if (in->uc == (ucontext_t*)0)
434 return 0; /* For (f)lex scanner ... */
435 swapcontext(&in->uc[0], &in->uc[1]);
437 free(in->uc); /* ... do not enter next */
438 in->uc = (ucontext_t*)0;
440 in->stack = (uint8_t*)0;
452 LZW_t *lzw = openlzw("man.1.Z", "r");
457 len = readlzw(lzw, &buffer[0], sizeof(buffer));
458 write(1, &buffer[0], len);