"Initial commit to Gerrit"
[profile/ivi/libgsf.git] / gsf / gsf-input-gzip.c
1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3  * gsf-input-gzip.c: wrapper to uncompress gzipped input
4  *
5  * Copyright (C) 2002-2006 Jody Goldberg (jody@gnome.org)
6  * Copyright (C) 2005-2006 Morten Welinder (terra@gnome.org)
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of version 2.1 of the GNU Lesser General Public
10  * License as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
20  * USA
21  */
22
23 #include <gsf-config.h>
24 #include <gsf/gsf-input-gzip.h>
25 #include <gsf/gsf-input-impl.h>
26 #include <gsf/gsf-impl-utils.h>
27 #include <gsf/gsf-utils.h>
28
29 #include <zlib.h>
30 #include <stdio.h>
31 #include <string.h>
32
33 #define Z_BUFSIZE 0x100
34
35 static GObjectClass *parent_class;
36
37 struct _GsfInputGZip {
38         GsfInput input;
39
40         GsfInput *source; /* compressed data */
41         gboolean raw; /* No header and no trailer.  */
42         GError *err;
43         gsf_off_t uncompressed_size;
44         gboolean stop_byte_added;
45
46         z_stream  stream;
47         guint8 const *gzipped_data;
48         uLong     crc;     /* crc32 of uncompressed data */
49
50         guint8   *buf;
51         size_t    buf_size;
52
53         gsf_off_t header_size, trailer_size;
54         gsf_off_t seek_skipped;
55 };
56
57 typedef struct {
58         GsfInputClass input_class;
59 } GsfInputGZipClass;
60
61 enum {
62         PROP_0,
63         PROP_RAW,
64         PROP_SOURCE,
65         PROP_UNCOMPRESSED_SIZE
66 };
67
68 /* gzip flag byte */
69 #define GZIP_IS_ASCII           0x01 /* file contains text ? */
70 #define GZIP_HEADER_CRC         0x02 /* there is a CRC in the header */
71 #define GZIP_EXTRA_FIELD        0x04 /* there is an 'extra' field */
72 #define GZIP_ORIGINAL_NAME      0x08 /* the original is stored */
73 #define GZIP_HAS_COMMENT        0x10 /* There is a comment in the header */
74 #define GZIP_HEADER_FLAGS (unsigned)(GZIP_IS_ASCII |GZIP_HEADER_CRC |GZIP_EXTRA_FIELD |GZIP_ORIGINAL_NAME |GZIP_HAS_COMMENT)
75
76 static gboolean
77 check_header (GsfInputGZip *input)
78 {
79         if (input->raw) {
80                 input->header_size = 0;
81                 input->trailer_size = 0;
82         } else {
83                 static guint8 const signature[2] = {0x1f, 0x8b};
84                 guint8 const *data;
85                 unsigned flags, len;
86
87                 /* Check signature */
88                 if (NULL == (data = gsf_input_read (input->source, 2 + 1 + 1 + 6, NULL)) ||
89                     0 != memcmp (data, signature, sizeof (signature)))
90                         return TRUE;
91
92                 /* verify flags and compression type */
93                 flags  = data[3];
94                 if (data[2] != Z_DEFLATED || (flags & ~GZIP_HEADER_FLAGS) != 0)
95                         return TRUE;
96
97                 /* If we have the size, don't bother seeking to the end.  */
98                 if (input->uncompressed_size < 0) {
99                         /* Get the uncompressed size */
100                         if (gsf_input_seek (input->source, (gsf_off_t) -4, G_SEEK_END) ||
101                             NULL == (data = gsf_input_read (input->source, 4, NULL)))
102                                 return TRUE;
103                         /* FIXME, but how?  The size read here is modulo 2^32.  */
104                         input->uncompressed_size = GSF_LE_GET_GUINT32 (data);
105
106                         if (input->uncompressed_size / 1000 > gsf_input_size (input->source)) {
107                                 g_warning ("Suspiciously well compressed file with better than 1000:1 ratio.\n"
108                                            "It is probably truncated or corrupt");
109                         }
110                 }
111
112                 if (gsf_input_seek (input->source, 2 + 1 + 1 + 6, G_SEEK_SET))
113                         return TRUE;
114
115                 if (flags & GZIP_EXTRA_FIELD) {
116                         if (NULL == (data = gsf_input_read (input->source, 2, NULL)))
117                                 return TRUE;
118                         len = GSF_LE_GET_GUINT16 (data);
119                         if (NULL == gsf_input_read (input->source, len, NULL))
120                                 return TRUE;
121                 }
122                 if (flags & GZIP_ORIGINAL_NAME) {
123                         /* Skip over the filename (which is in ISO 8859-1 encoding).  */
124                         do {
125                                 if (NULL == (data = gsf_input_read (input->source, 1, NULL)))
126                                         return TRUE;
127                         } while (*data != 0);
128                 }
129
130                 if (flags & GZIP_HAS_COMMENT) {
131                         /* Skip over the comment (which is in ISO 8859-1 encoding).  */
132                         do {
133                                 if (NULL == (data = gsf_input_read (input->source, 1, NULL)))
134                                         return TRUE;
135                         } while (*data != 0);
136                 }
137
138                 if (flags & GZIP_HEADER_CRC &&
139                     NULL == (data = gsf_input_read (input->source, 2, NULL)))
140                         return TRUE;
141
142                 input->header_size = input->source->cur_offset;
143                 /* the last 8 bytes are the crc and size.  */
144                 input->trailer_size = 8;
145         }
146
147         gsf_input_set_size (GSF_INPUT (input), input->uncompressed_size);
148
149         if (gsf_input_remaining (input->source) < input->trailer_size)
150                 return TRUE;    /* No room for payload */
151
152         return FALSE;
153 }
154
155 static gboolean
156 init_zip (GsfInputGZip *gzip, GError **err)
157 {
158         gsf_off_t cur_pos;
159
160         if (Z_OK != inflateInit2 (&(gzip->stream), -MAX_WBITS)) {
161                 if (err != NULL)
162                         *err = g_error_new (gsf_input_error_id (), 0,
163                                 "Unable to initialize zlib");
164                 return TRUE;
165         }
166
167         cur_pos = gsf_input_tell (gzip->source);
168         if (gsf_input_seek (gzip->source, 0, G_SEEK_SET)) {
169                 if (err)
170                         *err = g_error_new (gsf_input_error_id (), 0,
171                                             "Failed to rewind source");
172                 return TRUE;
173         }
174
175         if (check_header (gzip) != FALSE) {
176                 if (err != NULL)
177                         *err = g_error_new (gsf_input_error_id (), 0,
178                                 "Invalid gzip header");
179                 if (gsf_input_seek (gzip->source, cur_pos, G_SEEK_SET)) {
180                         g_warning ("attempt to restore position failed ??");
181                 }
182                 return TRUE;
183         }
184
185         return FALSE;
186 }
187
188 /**
189  * gsf_input_gzip_new :
190  * @source : The underlying data source.
191  * @err    : optionally %NULL.
192  *
193  * Adds a reference to @source.
194  *
195  * Returns: a new file or %NULL.
196  **/
197 GsfInput *
198 gsf_input_gzip_new (GsfInput *source, GError **err)
199 {
200         GsfInputGZip *gzip;
201
202         g_return_val_if_fail (GSF_IS_INPUT (source), NULL);
203
204         gzip = g_object_new (GSF_INPUT_GZIP_TYPE,
205                              "source", source,
206                              NULL);
207         if (G_UNLIKELY (NULL == gzip)) return NULL;
208
209         if (gzip->err) {
210                 if (err)
211                         *err = g_error_copy (gzip->err);
212                 g_object_unref (gzip);
213                 return NULL;
214         }
215         gsf_input_set_name (GSF_INPUT (gzip), gsf_input_name (source));
216
217         return GSF_INPUT (gzip);
218 }
219
220 static void
221 gsf_input_gzip_finalize (GObject *obj)
222 {
223         GsfInputGZip *gzip = (GsfInputGZip *)obj;
224
225         if (gzip->source != NULL) {
226                 g_object_unref (G_OBJECT (gzip->source));
227                 gzip->source = NULL;
228         }
229
230         g_free (gzip->buf);
231
232         if (gzip->stream.state != NULL)
233                 inflateEnd (&(gzip->stream));
234
235         g_clear_error (&gzip->err);
236
237         parent_class->finalize (obj);
238 }
239
240 static GsfInput *
241 gsf_input_gzip_dup (GsfInput *src_input, GError **err)
242 {
243         GsfInputGZip const *src = (GsfInputGZip *)src_input;
244         GsfInputGZip *dst;
245         GsfInput *src_source_copy;
246
247         if (src->source) {
248                 src_source_copy = gsf_input_dup (src->source, err);
249                 if (err)
250                         return NULL;
251         } else
252                 src_source_copy = NULL;
253
254         dst = g_object_new (GSF_INPUT_GZIP_TYPE,
255                             "source", src_source_copy,
256                             "raw", src->raw,
257                             NULL);
258
259         if (src_source_copy)
260                 g_object_unref (src_source_copy);
261
262         if (G_UNLIKELY (NULL == dst))
263                 return NULL;
264
265         if (src->err) {
266                 g_clear_error (&dst->err);
267                 dst->err = g_error_copy (src->err);
268         } else if (dst->err) {
269                 if (err)
270                         *err = g_error_copy (dst->err);
271                 g_object_unref (dst);
272                 return NULL;
273         }
274
275         return GSF_INPUT (dst);
276 }
277
278 static guint8 const *
279 gsf_input_gzip_read (GsfInput *input, size_t num_bytes, guint8 *buffer)
280 {
281         GsfInputGZip *gzip = GSF_INPUT_GZIP (input);
282
283         if (buffer == NULL) {
284                 if (gzip->buf_size < num_bytes) {
285                         gzip->buf_size = MAX (num_bytes, 256);
286                         g_free (gzip->buf);
287                         gzip->buf = g_new (guint8, gzip->buf_size);
288                 }
289                 buffer = gzip->buf;
290         }
291
292         gzip->stream.next_out = buffer;
293         gzip->stream.avail_out = num_bytes;
294         while (gzip->stream.avail_out != 0) {
295                 int zerr;
296                 if (gzip->stream.avail_in == 0) {
297                         gsf_off_t remain = gsf_input_remaining (gzip->source);
298                         if (remain <= gzip->trailer_size) {
299                                 if (remain < gzip->trailer_size || gzip->stop_byte_added) {
300                                         g_clear_error (&gzip->err);
301                                         gzip->err = g_error_new
302                                                 (gsf_input_error_id (), 0,
303                                                  "truncated source");
304                                         return NULL;
305                                 }
306                                 /* zlib requires an extra byte.  */
307                                 gzip->stream.avail_in = 1;
308                                 gzip->gzipped_data = "";
309                                 gzip->stop_byte_added = TRUE;
310                         } else {
311                                 size_t n = MIN (remain - gzip->trailer_size,
312                                                 Z_BUFSIZE);
313
314                                 gzip->gzipped_data =
315                                         gsf_input_read (gzip->source, n, NULL);
316                                 if (!gzip->gzipped_data) {
317                                         g_clear_error (&gzip->err);
318                                         gzip->err = g_error_new
319                                                 (gsf_input_error_id (), 0,
320                                                  "Failed to read from source");
321                                         return NULL;
322                                 }
323                                 gzip->stream.avail_in = n;
324                         }
325                         gzip->stream.next_in = (Byte *)gzip->gzipped_data;
326                 }
327                 zerr = inflate (&(gzip->stream), Z_NO_FLUSH);
328                 if (zerr != Z_OK) {
329                         if (zerr != Z_STREAM_END)
330                                 return NULL;
331                         /* Premature end of stream.  */
332                         if (gzip->stream.avail_out != 0)
333                                 return NULL;
334                 }
335         }
336
337         gzip->crc = crc32 (gzip->crc, buffer, (uInt)(gzip->stream.next_out - buffer));
338         return buffer;
339 }
340
341 static gboolean
342 gsf_input_gzip_seek (GsfInput *input, gsf_off_t offset, GSeekType whence)
343 {
344         GsfInputGZip *gzip = GSF_INPUT_GZIP (input);
345         /* Global flag -- we don't want one per stream.  */
346         static gboolean warned = FALSE;
347         gsf_off_t pos = offset;
348
349         /* Note, that pos has already been sanity checked.  */
350         switch (whence) {
351         case G_SEEK_SET : break;
352         case G_SEEK_CUR : pos += input->cur_offset;     break;
353         case G_SEEK_END : pos += input->size;           break;
354         default : return TRUE;
355         }
356
357         if (pos < input->cur_offset) {
358                 if (gsf_input_seek (gzip->source, gzip->header_size, G_SEEK_SET))
359                         return TRUE;
360                 gzip->crc = crc32 (0L, Z_NULL, 0);
361                 gzip->stream.avail_in = 0;
362                 if (inflateReset (&(gzip->stream)) != Z_OK)
363                         return TRUE;
364                 input->cur_offset = 0;
365         }
366
367         if (gsf_input_seek_emulate (input, pos))
368                 return TRUE;
369
370         gzip->seek_skipped += pos;
371         if (!warned &&
372             gzip->seek_skipped != pos && /* Don't warn for single seek.  */
373             gzip->seek_skipped >= 1000000) {
374                 warned = TRUE;
375                 g_warning ("Seeking in gzipped streams is awfully slow.");
376         }
377
378         return FALSE;
379 }
380
381 static void
382 gsf_input_gzip_init (GObject *obj)
383 {
384         GsfInputGZip *gzip = GSF_INPUT_GZIP (obj);
385
386         gzip->source = NULL;
387         gzip->raw = FALSE;
388         gzip->uncompressed_size = -1;
389         gzip->err = NULL;
390         gzip->stream.zalloc     = (alloc_func)0;
391         gzip->stream.zfree      = (free_func)0;
392         gzip->stream.opaque     = (voidpf)0;
393         gzip->stream.next_in    = Z_NULL;
394         gzip->stream.next_out   = Z_NULL;
395         gzip->stream.avail_in   = gzip->stream.avail_out = 0;
396         gzip->crc               = crc32 (0L, Z_NULL, 0);
397         gzip->buf               = NULL;
398         gzip->buf_size          = 0;
399         gzip->seek_skipped = 0;
400 }
401
402 static void
403 gsf_input_gzip_get_property (GObject     *object,
404                              guint        property_id,
405                              GValue      *value,
406                              GParamSpec  *pspec)
407 {
408         GsfInputGZip *gzip = (GsfInputGZip *)object;
409
410         switch (property_id) {
411         case PROP_RAW:
412                 g_value_set_boolean (value, gzip->raw);
413                 break;
414         case PROP_SOURCE:
415                 g_value_set_object (value, gzip->source);
416                 break;
417         case PROP_UNCOMPRESSED_SIZE:
418                 g_value_set_int64 (value, gzip->uncompressed_size);
419                 break;
420         default:
421                 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
422                 break;
423         }
424 }
425
426 static void
427 gsf_input_gzip_set_source (GsfInputGZip *gzip, GsfInput *source)
428 {
429         if (source)
430                 g_object_ref (GSF_INPUT (source));
431         if (gzip->source)
432                 g_object_unref (gzip->source);
433         gzip->source = source;
434 }
435
436 static void
437 gsf_input_gzip_set_property (GObject      *object,
438                              guint         property_id,
439                              GValue const *value,
440                              GParamSpec   *pspec)
441 {
442         GsfInputGZip *gzip = (GsfInputGZip *)object;
443
444         switch (property_id) {
445         case PROP_RAW:
446                 gzip->raw = g_value_get_boolean (value);
447                 break;
448         case PROP_SOURCE:
449                 gsf_input_gzip_set_source (gzip, g_value_get_object (value));
450                 break;
451         case PROP_UNCOMPRESSED_SIZE:
452                 gzip->uncompressed_size = g_value_get_int64 (value);
453                 break;
454         default:
455                 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
456                 break;
457         }
458 }
459
460 static GObject*
461 gsf_input_gzip_constructor (GType                  type,
462                             guint                  n_construct_properties,
463                             GObjectConstructParam *construct_params)
464 {
465   GsfInputGZip *gzip;
466
467   gzip = (GsfInputGZip *)(parent_class->constructor (type,
468                                                      n_construct_properties,
469                                                      construct_params));
470
471   if (!gzip->source) {
472           g_clear_error (&gzip->err);
473           gzip->err = g_error_new (gsf_input_error_id (), 0,
474                                    "NULL source");
475   } else if (gzip->raw && gzip->uncompressed_size < 0) {
476           g_clear_error (&gzip->err);
477           gzip->err = g_error_new (gsf_input_error_id (), 0,
478                                    "Uncompressed size not set");
479   } else if (init_zip (gzip, &gzip->err) != FALSE) {
480           /* Nothing more.  */
481   }
482
483   return (GObject *)gzip;
484 }
485
486 static void
487 gsf_input_gzip_class_init (GObjectClass *gobject_class)
488 {
489         GsfInputClass *input_class = GSF_INPUT_CLASS (gobject_class);
490
491         gobject_class->constructor  = gsf_input_gzip_constructor;
492         gobject_class->finalize     = gsf_input_gzip_finalize;
493         gobject_class->set_property = gsf_input_gzip_set_property;
494         gobject_class->get_property = gsf_input_gzip_get_property;
495         input_class->Dup            = gsf_input_gzip_dup;
496         input_class->Read           = gsf_input_gzip_read;
497         input_class->Seek           = gsf_input_gzip_seek;
498
499         g_object_class_install_property
500                 (gobject_class,
501                  PROP_RAW,
502                  g_param_spec_boolean ("raw", "Raw",
503                                        "Whether to read compressed data with no header and no trailer.",
504                                        FALSE,
505                                        GSF_PARAM_STATIC |
506                                        G_PARAM_READWRITE |
507                                        G_PARAM_CONSTRUCT_ONLY));
508         g_object_class_install_property
509                 (gobject_class,
510                  PROP_SOURCE,
511                  g_param_spec_object ("source", "Source",
512                                       "Where the compressed data comes from.",
513                                       GSF_INPUT_TYPE,
514                                       GSF_PARAM_STATIC |
515                                       G_PARAM_READWRITE |
516                                       G_PARAM_CONSTRUCT_ONLY));
517         /**
518          * GsfInputGzip:uncompressed_size:
519          *
520          * The size that the data will have after uncompression.
521          * The is mandatory for raw streams and if the uncompressed size is
522          * larger than 4GB.
523          */  
524         g_object_class_install_property
525                 (gobject_class,
526                  PROP_UNCOMPRESSED_SIZE,
527                  g_param_spec_int64 ("uncompressed-size", "Size after decompression",
528                                      "The source's uncompressed size",
529                                      -1, G_MAXINT64, -1,
530                                      GSF_PARAM_STATIC |
531                                      G_PARAM_READWRITE |
532                                      G_PARAM_CONSTRUCT_ONLY));
533
534         parent_class = g_type_class_peek_parent (gobject_class);
535 }
536
537 GSF_CLASS (GsfInputGZip, gsf_input_gzip,
538            gsf_input_gzip_class_init, gsf_input_gzip_init,
539            GSF_INPUT_TYPE)
540