Flush gcharsetconverter when needed.
[platform/upstream/glib.git] / gio / gcharsetconverter.c
1 /* GIO - GLib Input, Output and Streaming Library
2  *
3  * Copyright (C) 2009 Red Hat, Inc.
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General
16  * Public License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
18  * Boston, MA 02111-1307, USA.
19  *
20  * Author: Alexander Larsson <alexl@redhat.com>
21  */
22
23 #include "config.h"
24
25 #include <errno.h>
26
27 #include "gcontenttypeprivate.h"
28 #include "gcharsetconverter.h"
29 #include "glib.h"
30 #include "ginitable.h"
31 #include "gioerror.h"
32 #include "glibintl.h"
33
34 #include "gioalias.h"
35
36 enum {
37   PROP_0,
38   PROP_FROM_CHARSET,
39   PROP_TO_CHARSET,
40   PROP_USE_FALLBACK
41 };
42
43 /**
44  * SECTION:gcharsetconverter
45  * @short_description: Convert between charsets
46  * @include: gio/gio.h
47  *
48  * #GCharsetConverter is an implementation of #GConverter based on
49  * GIConv.
50  */
51
52 static void g_charset_converter_iface_init          (GConverterIface *iface);
53 static void g_charset_converter_initable_iface_init (GInitableIface  *iface);
54
55 /**
56  * GCharsetConverter:
57  *
58  * Conversions between character sets.
59  */
60 struct _GCharsetConverter
61 {
62   GObject parent_instance;
63
64   char *from;
65   char *to;
66   GIConv iconv;
67   gboolean use_fallback;
68   guint n_fallback_errors;
69 };
70
71 G_DEFINE_TYPE_WITH_CODE (GCharsetConverter, g_charset_converter, G_TYPE_OBJECT,
72                          G_IMPLEMENT_INTERFACE (G_TYPE_CONVERTER,
73                                                 g_charset_converter_iface_init);
74                          G_IMPLEMENT_INTERFACE (G_TYPE_INITABLE,
75                                                 g_charset_converter_initable_iface_init))
76
77 static void
78 g_charset_converter_finalize (GObject *object)
79 {
80   GCharsetConverter *conv;
81
82   conv = G_CHARSET_CONVERTER (object);
83
84   g_free (conv->from);
85   g_free (conv->to);
86   if (conv->iconv)
87     g_iconv_close (conv->iconv);
88
89   G_OBJECT_CLASS (g_charset_converter_parent_class)->finalize (object);
90 }
91
92 static void
93 g_charset_converter_set_property (GObject      *object,
94                                   guint         prop_id,
95                                   const GValue *value,
96                                   GParamSpec   *pspec)
97 {
98   GCharsetConverter *conv;
99
100   conv = G_CHARSET_CONVERTER (object);
101
102   switch (prop_id)
103     {
104     case PROP_TO_CHARSET:
105       g_free (conv->to);
106       conv->to = g_value_dup_string (value);
107       break;
108
109     case PROP_FROM_CHARSET:
110       g_free (conv->from);
111       conv->from = g_value_dup_string (value);
112       break;
113
114     case PROP_USE_FALLBACK:
115       conv->use_fallback = g_value_get_boolean (value);
116       break;
117
118     default:
119       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
120       break;
121     }
122
123 }
124
125 static void
126 g_charset_converter_get_property (GObject    *object,
127                                   guint       prop_id,
128                                   GValue     *value,
129                                   GParamSpec *pspec)
130 {
131   GCharsetConverter *conv;
132
133   conv = G_CHARSET_CONVERTER (object);
134
135   switch (prop_id)
136     {
137     case PROP_TO_CHARSET:
138       g_value_set_string (value, conv->to);
139       break;
140
141     case PROP_FROM_CHARSET:
142       g_value_set_string (value, conv->from);
143       break;
144
145     case PROP_USE_FALLBACK:
146       g_value_set_boolean (value, conv->use_fallback);
147       break;
148
149     default:
150       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
151       break;
152     }
153 }
154
155 static void
156 g_charset_converter_class_init (GCharsetConverterClass *klass)
157 {
158   GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
159
160   gobject_class->finalize = g_charset_converter_finalize;
161   gobject_class->get_property = g_charset_converter_get_property;
162   gobject_class->set_property = g_charset_converter_set_property;
163
164   g_object_class_install_property (gobject_class,
165                                    PROP_TO_CHARSET,
166                                    g_param_spec_string ("to-charset",
167                                                         P_("To Charset"),
168                                                         P_("The character encoding to convert to"),
169                                                         NULL,
170                                                         G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY |
171                                                         G_PARAM_STATIC_STRINGS));
172   g_object_class_install_property (gobject_class,
173                                    PROP_FROM_CHARSET,
174                                    g_param_spec_string ("from-charset",
175                                                         P_("From Charset"),
176                                                         P_("The character encoding to convert from"),
177                                                         NULL,
178                                                         G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY |
179                                                         G_PARAM_STATIC_STRINGS));
180   g_object_class_install_property (gobject_class,
181                                    PROP_USE_FALLBACK,
182                                    g_param_spec_boolean ("use-fallback",
183                                                          P_("Fallback enabled"),
184                                                          P_("Use fallback (of form \\<hexval>) for invalid bytes"),
185                                                          FALSE,
186                                                          G_PARAM_READWRITE |
187                                                          G_PARAM_CONSTRUCT |
188                                                          G_PARAM_STATIC_STRINGS));
189 }
190
191 static void
192 g_charset_converter_init (GCharsetConverter *local)
193 {
194 }
195
196
197 /**
198  * g_charset_converter_new:
199  * @to_charset: destination charset
200  * @from_charset: source charset
201  * @error: #GError for error reporting, or %NULL to ignore.
202  *
203  * Creates a new #GCharsetConverter.
204  *
205  * Returns: a new #GCharsetConverter or %NULL on error.
206  *
207  * Since: 2.24
208  **/
209 GCharsetConverter *
210 g_charset_converter_new (const gchar  *to_charset,
211                          const gchar  *from_charset,
212                          GError       **error)
213 {
214   GCharsetConverter *conv;
215
216   conv = g_initable_new (G_TYPE_CHARSET_CONVERTER,
217                          NULL, error,
218                          "to-charset", to_charset,
219                          "from-charset", from_charset,
220                          NULL);
221
222   return conv;
223 }
224
225 static void
226 g_charset_converter_reset (GConverter *converter)
227 {
228   GCharsetConverter *conv = G_CHARSET_CONVERTER (converter);
229
230   if (conv->iconv == NULL)
231     {
232       g_warning ("Invalid object, not initialized");
233       return;
234     }
235
236   g_iconv (conv->iconv, NULL, NULL, NULL, NULL);
237   conv->n_fallback_errors = 0;
238 }
239
240 static GConverterResult
241 g_charset_converter_convert (GConverter *converter,
242                              const void *inbuf,
243                              gsize       inbuf_size,
244                              void       *outbuf,
245                              gsize       outbuf_size,
246                              GConverterFlags flags,
247                              gsize      *bytes_read,
248                              gsize      *bytes_written,
249                              GError    **error)
250 {
251   GCharsetConverter  *conv;
252   gsize res;
253   GConverterResult ret;
254   gchar *inbufp, *outbufp;
255   gsize in_left, out_left;
256   int errsv;
257   gboolean reset;
258
259   conv = G_CHARSET_CONVERTER (converter);
260
261   if (conv->iconv == NULL)
262     {
263       g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_INITIALIZED,
264                            _("Invalid object, not initialized"));
265       return G_CONVERTER_ERROR;
266     }
267
268   inbufp = (char *)inbuf;
269   outbufp = (char *)outbuf;
270   in_left = inbuf_size;
271   out_left = outbuf_size;
272   reset = FALSE;
273
274   /* if there is not input try to flush the data */
275   if (inbuf_size == 0)
276     {
277       if (flags & G_CONVERTER_INPUT_AT_END ||
278           flags & G_CONVERTER_FLUSH)
279         {
280           reset = TRUE;
281         }
282       else
283         {
284           g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT,
285                                _("Incomplete multibyte sequence in input"));
286           return G_CONVERTER_ERROR;
287         }
288     }
289
290   if (reset)
291     /* call g_iconv with NULL inbuf to cleanup shift state */
292     res = g_iconv (conv->iconv,
293                    NULL, &in_left,
294                    &outbufp, &out_left);
295   else
296     res = g_iconv (conv->iconv,
297                    &inbufp, &in_left,
298                    &outbufp, &out_left);
299
300   *bytes_read = inbufp - (char *)inbuf;
301   *bytes_written = outbufp - (char *)outbuf;
302
303   /* Don't report error if we converted anything */
304   if (res == (gsize) -1 && *bytes_read == 0)
305     {
306       errsv = errno;
307
308       switch (errsv)
309         {
310         case EINVAL:
311           /* Incomplete input text */
312           g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT,
313                                _("Incomplete multibyte sequence in input"));
314           break;
315
316         case E2BIG:
317           /* Not enough destination space */
318           g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NO_SPACE,
319                                _("Not enough space in destination"));
320           break;
321
322         case EILSEQ:
323           /* Invalid code sequence */
324           if (conv->use_fallback)
325             {
326               if (outbuf_size < 3)
327                 g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NO_SPACE,
328                                      _("Not enough space in destination"));
329               else
330                 {
331                   const char hex[] = "0123456789ABCDEF";
332                   guint8 v = *(guint8 *)inbuf;
333                   guint8 *out = (guint8 *)outbuf;
334                   out[0] = '\\';
335                   out[1] = hex[(v & 0xf0) >> 4];
336                   out[2] = hex[(v & 0x0f) >> 0];
337                   *bytes_read = 1;
338                   *bytes_written = 3;
339                   in_left--;
340                   conv->n_fallback_errors++;
341                   goto ok;
342                 }
343             }
344           else
345             g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA,
346                                  _("Invalid byte sequence in conversion input"));
347           break;
348
349         default:
350           g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
351                        _("Error during conversion: %s"),
352                        g_strerror (errsv));
353           break;
354         }
355       ret = G_CONVERTER_ERROR;
356     }
357   else
358     {
359     ok:
360       ret = G_CONVERTER_CONVERTED;
361
362       if (reset &&
363           (flags & G_CONVERTER_INPUT_AT_END))
364         ret = G_CONVERTER_FINISHED;
365       else if (reset &&
366                (flags & G_CONVERTER_FLUSH))
367         ret = G_CONVERTER_FLUSHED;
368     }
369
370   return ret;
371 }
372
373 /**
374  * g_charset_converter_set_use_fallback:
375  * @converter: a #GCharsetConverter
376  * @use_fallback: %TRUE to use fallbacks
377  *
378  * Sets the #GCharsetConverter:use-fallback property.
379  *
380  * Since: 2.24
381  */
382 void
383 g_charset_converter_set_use_fallback (GCharsetConverter *converter,
384                                       gboolean           use_fallback)
385 {
386   use_fallback = !!use_fallback;
387
388   if (converter->use_fallback != use_fallback)
389     {
390       converter->use_fallback = use_fallback;
391       g_object_notify (G_OBJECT (converter), "use-fallback");
392     }
393 }
394
395 /**
396  * g_charset_converter_get_use_fallback:
397  * @converter: a #GCharsetConverter
398  *
399  * Gets the #GCharsetConverter:use-fallback property.
400  *
401  * Returns: %TRUE if fallbacks are used by @converter
402  *
403  * Since: 2.24
404  */
405 gboolean
406 g_charset_converter_get_use_fallback (GCharsetConverter *converter)
407 {
408   return converter->use_fallback;
409 }
410
411 /**
412  * g_charset_converter_get_num_fallbacks:
413  * @converter: a #GCharsetConverter
414  *
415  * Gets the number of fallbacks that @converter has applied so far.
416  *
417  * Returns: the number of fallbacks that @converter has applied
418  *
419  * Since: 2.24
420  */
421 guint
422 g_charset_converter_get_num_fallbacks (GCharsetConverter *converter)
423 {
424   return converter->n_fallback_errors;
425 }
426
427 static void
428 g_charset_converter_iface_init (GConverterIface *iface)
429 {
430   iface->convert = g_charset_converter_convert;
431   iface->reset = g_charset_converter_reset;
432 }
433
434 static gboolean
435 g_charset_converter_initable_init (GInitable *initable,
436                                    GCancellable *cancellable,
437                                    GError  **error)
438 {
439   GCharsetConverter  *conv;
440
441   g_return_val_if_fail (G_IS_CHARSET_CONVERTER (initable), FALSE);
442
443   conv = G_CHARSET_CONVERTER (initable);
444
445   if (cancellable != NULL)
446     {
447       g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,
448                            _("Cancellable initialization not supported"));
449       return FALSE;
450     }
451
452   conv->iconv =
453     g_iconv_open (conv->to, conv->from);
454
455   if (conv->iconv == NULL)
456     {
457       if (errno == EINVAL)
458         g_set_error (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,
459                      _("Conversion from character set '%s' to '%s' is not supported"),
460                      conv->from, conv->to);
461       else
462         g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
463                      _("Could not open converter from '%s' to '%s'"),
464                      conv->from, conv->to);
465       return FALSE;
466     }
467
468   return TRUE;
469 }
470
471 static void
472 g_charset_converter_initable_iface_init (GInitableIface *iface)
473 {
474   iface->init = g_charset_converter_initable_init;
475 }
476
477 #define __G_CHARSET_CONVERTER_C__
478 #include "gioaliasdef.c"