1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
3 * gsf-input-gzip.c: wrapper to uncompress gzipped input
5 * Copyright (C) 2002-2006 Jody Goldberg (jody@gnome.org)
6 * Copyright (C) 2005-2006 Morten Welinder (terra@gnome.org)
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of version 2.1 of the GNU Lesser General Public
10 * License as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23 #include <gsf-config.h>
24 #include <gsf/gsf-input-gzip.h>
25 #include <gsf/gsf-input-impl.h>
26 #include <gsf/gsf-impl-utils.h>
27 #include <gsf/gsf-utils.h>
33 #define Z_BUFSIZE 0x100
35 static GObjectClass *parent_class;
37 struct _GsfInputGZip {
40 GsfInput *source; /* compressed data */
41 gboolean raw; /* No header and no trailer. */
43 gsf_off_t uncompressed_size;
44 gboolean stop_byte_added;
47 guint8 const *gzipped_data;
48 uLong crc; /* crc32 of uncompressed data */
53 gsf_off_t header_size, trailer_size;
54 gsf_off_t seek_skipped;
58 GsfInputClass input_class;
65 PROP_UNCOMPRESSED_SIZE
69 #define GZIP_IS_ASCII 0x01 /* file contains text ? */
70 #define GZIP_HEADER_CRC 0x02 /* there is a CRC in the header */
71 #define GZIP_EXTRA_FIELD 0x04 /* there is an 'extra' field */
72 #define GZIP_ORIGINAL_NAME 0x08 /* the original is stored */
73 #define GZIP_HAS_COMMENT 0x10 /* There is a comment in the header */
74 #define GZIP_HEADER_FLAGS (unsigned)(GZIP_IS_ASCII |GZIP_HEADER_CRC |GZIP_EXTRA_FIELD |GZIP_ORIGINAL_NAME |GZIP_HAS_COMMENT)
77 check_header (GsfInputGZip *input)
80 input->header_size = 0;
81 input->trailer_size = 0;
83 static guint8 const signature[2] = {0x1f, 0x8b};
88 if (NULL == (data = gsf_input_read (input->source, 2 + 1 + 1 + 6, NULL)) ||
89 0 != memcmp (data, signature, sizeof (signature)))
92 /* verify flags and compression type */
94 if (data[2] != Z_DEFLATED || (flags & ~GZIP_HEADER_FLAGS) != 0)
97 /* If we have the size, don't bother seeking to the end. */
98 if (input->uncompressed_size < 0) {
99 /* Get the uncompressed size */
100 if (gsf_input_seek (input->source, (gsf_off_t) -4, G_SEEK_END) ||
101 NULL == (data = gsf_input_read (input->source, 4, NULL)))
103 /* FIXME, but how? The size read here is modulo 2^32. */
104 input->uncompressed_size = GSF_LE_GET_GUINT32 (data);
106 if (input->uncompressed_size / 1000 > gsf_input_size (input->source)) {
107 g_warning ("Suspiciously well compressed file with better than 1000:1 ratio.\n"
108 "It is probably truncated or corrupt");
112 if (gsf_input_seek (input->source, 2 + 1 + 1 + 6, G_SEEK_SET))
115 if (flags & GZIP_EXTRA_FIELD) {
116 if (NULL == (data = gsf_input_read (input->source, 2, NULL)))
118 len = GSF_LE_GET_GUINT16 (data);
119 if (NULL == gsf_input_read (input->source, len, NULL))
122 if (flags & GZIP_ORIGINAL_NAME) {
123 /* Skip over the filename (which is in ISO 8859-1 encoding). */
125 if (NULL == (data = gsf_input_read (input->source, 1, NULL)))
127 } while (*data != 0);
130 if (flags & GZIP_HAS_COMMENT) {
131 /* Skip over the comment (which is in ISO 8859-1 encoding). */
133 if (NULL == (data = gsf_input_read (input->source, 1, NULL)))
135 } while (*data != 0);
138 if (flags & GZIP_HEADER_CRC &&
139 NULL == (data = gsf_input_read (input->source, 2, NULL)))
142 input->header_size = input->source->cur_offset;
143 /* the last 8 bytes are the crc and size. */
144 input->trailer_size = 8;
147 gsf_input_set_size (GSF_INPUT (input), input->uncompressed_size);
149 if (gsf_input_remaining (input->source) < input->trailer_size)
150 return TRUE; /* No room for payload */
156 init_zip (GsfInputGZip *gzip, GError **err)
160 if (Z_OK != inflateInit2 (&(gzip->stream), -MAX_WBITS)) {
162 *err = g_error_new (gsf_input_error_id (), 0,
163 "Unable to initialize zlib");
167 cur_pos = gsf_input_tell (gzip->source);
168 if (gsf_input_seek (gzip->source, 0, G_SEEK_SET)) {
170 *err = g_error_new (gsf_input_error_id (), 0,
171 "Failed to rewind source");
175 if (check_header (gzip) != FALSE) {
177 *err = g_error_new (gsf_input_error_id (), 0,
178 "Invalid gzip header");
179 if (gsf_input_seek (gzip->source, cur_pos, G_SEEK_SET)) {
180 g_warning ("attempt to restore position failed ??");
189 * gsf_input_gzip_new :
190 * @source : The underlying data source.
191 * @err : optionally %NULL.
193 * Adds a reference to @source.
195 * Returns: a new file or %NULL.
198 gsf_input_gzip_new (GsfInput *source, GError **err)
202 g_return_val_if_fail (GSF_IS_INPUT (source), NULL);
204 gzip = g_object_new (GSF_INPUT_GZIP_TYPE,
207 if (G_UNLIKELY (NULL == gzip)) return NULL;
211 *err = g_error_copy (gzip->err);
212 g_object_unref (gzip);
215 gsf_input_set_name (GSF_INPUT (gzip), gsf_input_name (source));
217 return GSF_INPUT (gzip);
221 gsf_input_gzip_finalize (GObject *obj)
223 GsfInputGZip *gzip = (GsfInputGZip *)obj;
225 if (gzip->source != NULL) {
226 g_object_unref (G_OBJECT (gzip->source));
232 if (gzip->stream.state != NULL)
233 inflateEnd (&(gzip->stream));
235 g_clear_error (&gzip->err);
237 parent_class->finalize (obj);
241 gsf_input_gzip_dup (GsfInput *src_input, GError **err)
243 GsfInputGZip const *src = (GsfInputGZip *)src_input;
245 GsfInput *src_source_copy;
248 src_source_copy = gsf_input_dup (src->source, err);
252 src_source_copy = NULL;
254 dst = g_object_new (GSF_INPUT_GZIP_TYPE,
255 "source", src_source_copy,
260 g_object_unref (src_source_copy);
262 if (G_UNLIKELY (NULL == dst))
266 g_clear_error (&dst->err);
267 dst->err = g_error_copy (src->err);
268 } else if (dst->err) {
270 *err = g_error_copy (dst->err);
271 g_object_unref (dst);
275 return GSF_INPUT (dst);
278 static guint8 const *
279 gsf_input_gzip_read (GsfInput *input, size_t num_bytes, guint8 *buffer)
281 GsfInputGZip *gzip = GSF_INPUT_GZIP (input);
283 if (buffer == NULL) {
284 if (gzip->buf_size < num_bytes) {
285 gzip->buf_size = MAX (num_bytes, 256);
287 gzip->buf = g_new (guint8, gzip->buf_size);
292 gzip->stream.next_out = buffer;
293 gzip->stream.avail_out = num_bytes;
294 while (gzip->stream.avail_out != 0) {
296 if (gzip->stream.avail_in == 0) {
297 gsf_off_t remain = gsf_input_remaining (gzip->source);
298 if (remain <= gzip->trailer_size) {
299 if (remain < gzip->trailer_size || gzip->stop_byte_added) {
300 g_clear_error (&gzip->err);
301 gzip->err = g_error_new
302 (gsf_input_error_id (), 0,
306 /* zlib requires an extra byte. */
307 gzip->stream.avail_in = 1;
308 gzip->gzipped_data = "";
309 gzip->stop_byte_added = TRUE;
311 size_t n = MIN (remain - gzip->trailer_size,
315 gsf_input_read (gzip->source, n, NULL);
316 if (!gzip->gzipped_data) {
317 g_clear_error (&gzip->err);
318 gzip->err = g_error_new
319 (gsf_input_error_id (), 0,
320 "Failed to read from source");
323 gzip->stream.avail_in = n;
325 gzip->stream.next_in = (Byte *)gzip->gzipped_data;
327 zerr = inflate (&(gzip->stream), Z_NO_FLUSH);
329 if (zerr != Z_STREAM_END)
331 /* Premature end of stream. */
332 if (gzip->stream.avail_out != 0)
337 gzip->crc = crc32 (gzip->crc, buffer, (uInt)(gzip->stream.next_out - buffer));
342 gsf_input_gzip_seek (GsfInput *input, gsf_off_t offset, GSeekType whence)
344 GsfInputGZip *gzip = GSF_INPUT_GZIP (input);
345 /* Global flag -- we don't want one per stream. */
346 static gboolean warned = FALSE;
347 gsf_off_t pos = offset;
349 /* Note, that pos has already been sanity checked. */
351 case G_SEEK_SET : break;
352 case G_SEEK_CUR : pos += input->cur_offset; break;
353 case G_SEEK_END : pos += input->size; break;
354 default : return TRUE;
357 if (pos < input->cur_offset) {
358 if (gsf_input_seek (gzip->source, gzip->header_size, G_SEEK_SET))
360 gzip->crc = crc32 (0L, Z_NULL, 0);
361 gzip->stream.avail_in = 0;
362 if (inflateReset (&(gzip->stream)) != Z_OK)
364 input->cur_offset = 0;
367 if (gsf_input_seek_emulate (input, pos))
370 gzip->seek_skipped += pos;
372 gzip->seek_skipped != pos && /* Don't warn for single seek. */
373 gzip->seek_skipped >= 1000000) {
375 g_warning ("Seeking in gzipped streams is awfully slow.");
382 gsf_input_gzip_init (GObject *obj)
384 GsfInputGZip *gzip = GSF_INPUT_GZIP (obj);
388 gzip->uncompressed_size = -1;
390 gzip->stream.zalloc = (alloc_func)0;
391 gzip->stream.zfree = (free_func)0;
392 gzip->stream.opaque = (voidpf)0;
393 gzip->stream.next_in = Z_NULL;
394 gzip->stream.next_out = Z_NULL;
395 gzip->stream.avail_in = gzip->stream.avail_out = 0;
396 gzip->crc = crc32 (0L, Z_NULL, 0);
399 gzip->seek_skipped = 0;
403 gsf_input_gzip_get_property (GObject *object,
408 GsfInputGZip *gzip = (GsfInputGZip *)object;
410 switch (property_id) {
412 g_value_set_boolean (value, gzip->raw);
415 g_value_set_object (value, gzip->source);
417 case PROP_UNCOMPRESSED_SIZE:
418 g_value_set_int64 (value, gzip->uncompressed_size);
421 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
427 gsf_input_gzip_set_source (GsfInputGZip *gzip, GsfInput *source)
430 g_object_ref (GSF_INPUT (source));
432 g_object_unref (gzip->source);
433 gzip->source = source;
437 gsf_input_gzip_set_property (GObject *object,
442 GsfInputGZip *gzip = (GsfInputGZip *)object;
444 switch (property_id) {
446 gzip->raw = g_value_get_boolean (value);
449 gsf_input_gzip_set_source (gzip, g_value_get_object (value));
451 case PROP_UNCOMPRESSED_SIZE:
452 gzip->uncompressed_size = g_value_get_int64 (value);
455 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
461 gsf_input_gzip_constructor (GType type,
462 guint n_construct_properties,
463 GObjectConstructParam *construct_params)
467 gzip = (GsfInputGZip *)(parent_class->constructor (type,
468 n_construct_properties,
472 g_clear_error (&gzip->err);
473 gzip->err = g_error_new (gsf_input_error_id (), 0,
475 } else if (gzip->raw && gzip->uncompressed_size < 0) {
476 g_clear_error (&gzip->err);
477 gzip->err = g_error_new (gsf_input_error_id (), 0,
478 "Uncompressed size not set");
479 } else if (init_zip (gzip, &gzip->err) != FALSE) {
483 return (GObject *)gzip;
487 gsf_input_gzip_class_init (GObjectClass *gobject_class)
489 GsfInputClass *input_class = GSF_INPUT_CLASS (gobject_class);
491 gobject_class->constructor = gsf_input_gzip_constructor;
492 gobject_class->finalize = gsf_input_gzip_finalize;
493 gobject_class->set_property = gsf_input_gzip_set_property;
494 gobject_class->get_property = gsf_input_gzip_get_property;
495 input_class->Dup = gsf_input_gzip_dup;
496 input_class->Read = gsf_input_gzip_read;
497 input_class->Seek = gsf_input_gzip_seek;
499 g_object_class_install_property
502 g_param_spec_boolean ("raw", "Raw",
503 "Whether to read compressed data with no header and no trailer.",
507 G_PARAM_CONSTRUCT_ONLY));
508 g_object_class_install_property
511 g_param_spec_object ("source", "Source",
512 "Where the compressed data comes from.",
516 G_PARAM_CONSTRUCT_ONLY));
518 * GsfInputGzip:uncompressed_size:
520 * The size that the data will have after uncompression.
521 * The is mandatory for raw streams and if the uncompressed size is
524 g_object_class_install_property
526 PROP_UNCOMPRESSED_SIZE,
527 g_param_spec_int64 ("uncompressed-size", "Size after decompression",
528 "The source's uncompressed size",
532 G_PARAM_CONSTRUCT_ONLY));
534 parent_class = g_type_class_peek_parent (gobject_class);
537 GSF_CLASS (GsfInputGZip, gsf_input_gzip,
538 gsf_input_gzip_class_init, gsf_input_gzip_init,