Add fallback mode to GCharsetConverter
[platform/upstream/glib.git] / gio / gcharsetconverter.c
1 /* GIO - GLib Input, Output and Streaming Library
2  *
3  * Copyright (C) 2009 Red Hat, Inc.
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General
16  * Public License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
18  * Boston, MA 02111-1307, USA.
19  *
20  * Author: Alexander Larsson <alexl@redhat.com>
21  */
22
23 #include "config.h"
24
25 #include <errno.h>
26
27 #include "gcontenttypeprivate.h"
28 #include "gcharsetconverter.h"
29 #include "glib.h"
30 #include "ginitable.h"
31 #include "gioerror.h"
32 #include "glibintl.h"
33
34 #include "gioalias.h"
35
36 enum {
37   PROP_0,
38   PROP_FROM_CHARSET,
39   PROP_TO_CHARSET,
40   PROP_USE_FALLBACK
41 };
42
43 /**
44  * SECTION:gcharsetconverter
45  * @short_description: Convert between charsets
46  * @include: gio/gio.h
47  *
48  * #GCharsetConverter is an implementation of #GConverter based on
49  * GIConv.
50  */
51
52 static void g_charset_converter_iface_init          (GConverterIface *iface);
53 static void g_charset_converter_initable_iface_init (GInitableIface  *iface);
54
55 /**
56  * GCharsetConverter:
57  *
58  * Conversions between character sets.
59  */
60 struct _GCharsetConverter
61 {
62   GObject parent_instance;
63
64   char *from;
65   char *to;
66   GIConv iconv;
67   gboolean use_fallback;
68   guint n_fallback_errors;
69 };
70
71 G_DEFINE_TYPE_WITH_CODE (GCharsetConverter, g_charset_converter, G_TYPE_OBJECT,
72                          G_IMPLEMENT_INTERFACE (G_TYPE_CONVERTER,
73                                                 g_charset_converter_iface_init);
74                          G_IMPLEMENT_INTERFACE (G_TYPE_INITABLE,
75                                                 g_charset_converter_initable_iface_init))
76
77 static void
78 g_charset_converter_finalize (GObject *object)
79 {
80   GCharsetConverter *conv;
81
82   conv = G_CHARSET_CONVERTER (object);
83
84   g_free (conv->from);
85   g_free (conv->to);
86   if (conv->iconv)
87     g_iconv_close (conv->iconv);
88
89   G_OBJECT_CLASS (g_charset_converter_parent_class)->finalize (object);
90 }
91
92 static void
93 g_charset_converter_set_property (GObject      *object,
94                                   guint         prop_id,
95                                   const GValue *value,
96                                   GParamSpec   *pspec)
97 {
98   GCharsetConverter *conv;
99
100   conv = G_CHARSET_CONVERTER (object);
101
102   switch (prop_id)
103     {
104     case PROP_TO_CHARSET:
105       g_free (conv->to);
106       conv->to = g_value_dup_string (value);
107       break;
108
109     case PROP_FROM_CHARSET:
110       g_free (conv->from);
111       conv->from = g_value_dup_string (value);
112       break;
113
114     case PROP_USE_FALLBACK:
115       conv->use_fallback = g_value_get_boolean (value);
116       break;
117
118     default:
119       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
120       break;
121     }
122
123 }
124
125 static void
126 g_charset_converter_get_property (GObject    *object,
127                                   guint       prop_id,
128                                   GValue     *value,
129                                   GParamSpec *pspec)
130 {
131   GCharsetConverter *conv;
132
133   conv = G_CHARSET_CONVERTER (object);
134
135   switch (prop_id)
136     {
137     case PROP_TO_CHARSET:
138       g_value_set_string (value, conv->to);
139       break;
140
141     case PROP_FROM_CHARSET:
142       g_value_set_string (value, conv->from);
143       break;
144
145     case PROP_USE_FALLBACK:
146       g_value_set_boolean (value, conv->use_fallback);
147       break;
148
149     default:
150       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
151       break;
152     }
153 }
154
155 static void
156 g_charset_converter_class_init (GCharsetConverterClass *klass)
157 {
158   GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
159
160   gobject_class->finalize = g_charset_converter_finalize;
161   gobject_class->get_property = g_charset_converter_get_property;
162   gobject_class->set_property = g_charset_converter_set_property;
163
164   g_object_class_install_property (gobject_class,
165                                    PROP_TO_CHARSET,
166                                    g_param_spec_string ("to-charset",
167                                                         P_("To Charset"),
168                                                         P_("The character encoding to convert to"),
169                                                         NULL,
170                                                         G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY |
171                                                         G_PARAM_STATIC_STRINGS));
172   g_object_class_install_property (gobject_class,
173                                    PROP_FROM_CHARSET,
174                                    g_param_spec_string ("from-charset",
175                                                         P_("From Charset"),
176                                                         P_("The character encoding to convert from"),
177                                                         NULL,
178                                                         G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY |
179                                                         G_PARAM_STATIC_STRINGS));
180   g_object_class_install_property (gobject_class,
181                                    PROP_USE_FALLBACK,
182                                    g_param_spec_boolean ("use-fallback",
183                                                          P_("Fallback enabled"),
184                                                          P_("Use fallback (of form \\<hexval>) for invalid bytes"),
185                                                          FALSE,
186                                                          G_PARAM_READWRITE |
187                                                          G_PARAM_CONSTRUCT |
188                                                          G_PARAM_STATIC_STRINGS));
189 }
190
191 static void
192 g_charset_converter_init (GCharsetConverter *local)
193 {
194 }
195
196
197 /**
198  * g_charset_converter_new:
199  * @to_charset: destination charset
200  * @from_charset: source charset
201  * @error: #GError for error reporting, or %NULL to ignore.
202  *
203  * Creates a new #GCharsetConverter.
204  *
205  * Returns: a new #GCharsetConverter or %NULL on error.
206  *
207  * Since: 2.24
208  **/
209 GCharsetConverter *
210 g_charset_converter_new (const gchar  *to_charset,
211                          const gchar  *from_charset,
212                          GError       **error)
213 {
214   GCharsetConverter *conv;
215
216   conv = g_initable_new (G_TYPE_CHARSET_CONVERTER,
217                          NULL, error,
218                          "to-charset", to_charset,
219                          "from-charset", from_charset,
220                          NULL);
221
222   return conv;
223 }
224
225 static void
226 g_charset_converter_reset (GConverter *converter)
227 {
228   GCharsetConverter *conv = G_CHARSET_CONVERTER (converter);
229
230   if (conv->iconv == NULL)
231     {
232       g_warning ("Invalid object, not initialized");
233       return;
234     }
235
236   g_iconv (conv->iconv, NULL, NULL, NULL, NULL);
237   conv->n_fallback_errors = 0;
238 }
239
240 static GConverterResult
241 g_charset_converter_convert (GConverter *converter,
242                              const void *inbuf,
243                              gsize       inbuf_size,
244                              void       *outbuf,
245                              gsize       outbuf_size,
246                              GConverterFlags flags,
247                              gsize      *bytes_read,
248                              gsize      *bytes_written,
249                              GError    **error)
250 {
251   GCharsetConverter  *conv;
252   gsize res;
253   GConverterResult ret;
254   gchar *inbufp, *outbufp;
255   gsize in_left, out_left;
256   int errsv;
257
258   conv = G_CHARSET_CONVERTER (converter);
259
260   if (conv->iconv == NULL)
261     {
262       g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_INITIALIZED,
263                            _("Invalid object, not initialized"));
264       return G_CONVERTER_ERROR;
265     }
266
267   /* Iconv never produces output with no input, so handle this
268      specially */
269   if (inbuf_size == 0)
270     {
271       if (flags & G_CONVERTER_INPUT_AT_END)
272         return G_CONVERTER_FINISHED;
273
274       if (flags & G_CONVERTER_FLUSH)
275         return G_CONVERTER_FLUSHED;
276
277       g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT,
278                            _("Incomplete multibyte sequence in input"));
279       return G_CONVERTER_ERROR;
280     }
281
282   inbufp = (char *)inbuf;
283   outbufp = (char *)outbuf;
284   in_left = inbuf_size;
285   out_left = outbuf_size;
286
287   res = g_iconv (conv->iconv,
288                  &inbufp, &in_left,
289                  &outbufp, &out_left);
290
291   *bytes_read = inbufp - (char *)inbuf;
292   *bytes_written = outbufp - (char *)outbuf;
293
294   /* Don't report error if we converted anything */
295   if (res == (gsize) -1 && *bytes_read == 0)
296     {
297       errsv = errno;
298
299       switch (errsv)
300         {
301         case EINVAL:
302           /* Incomplete input text */
303           g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT,
304                                _("Incomplete multibyte sequence in input"));
305           break;
306
307         case E2BIG:
308           /* Not enough destination space */
309           g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NO_SPACE,
310                                _("Not enough space in destination"));
311           break;
312
313         case EILSEQ:
314           /* Invalid code sequence */
315           if (conv->use_fallback)
316             {
317               if (outbuf_size < 3)
318                 g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NO_SPACE,
319                                      _("Not enough space in destination"));
320               else
321                 {
322                   const char hex[] = "0123456789ABCDEF";
323                   guint8 v = *(guint8 *)inbuf;
324                   guint8 *out = (guint8 *)outbuf;
325                   out[0] = '\\';
326                   out[1] = hex[(v & 0xf0) >> 4];
327                   out[2] = hex[(v & 0x0f) >> 0];
328                   *bytes_read = 1;
329                   *bytes_written = 3;
330                   in_left--;
331                   conv->n_fallback_errors++;
332                   goto ok;
333                 }
334             }
335           else
336             g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA,
337                                  _("Invalid byte sequence in conversion input"));
338           break;
339
340         default:
341           g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
342                        _("Error during conversion: %s"),
343                        g_strerror (errsv));
344           break;
345         }
346       ret = G_CONVERTER_ERROR;
347     }
348   else
349     {
350     ok:
351       ret = G_CONVERTER_CONVERTED;
352
353       if (in_left == 0 &&
354           (flags & G_CONVERTER_INPUT_AT_END))
355         ret = G_CONVERTER_FINISHED;
356       else if (in_left == 0 &&
357                (flags & G_CONVERTER_FLUSH))
358         ret = G_CONVERTER_FLUSHED;
359     }
360
361   return ret;
362 }
363
364 void
365 g_charset_converter_set_use_fallback (GCharsetConverter *converter,
366                                       gboolean use_fallback)
367 {
368   use_fallback = !!use_fallback;
369
370   if (converter->use_fallback != use_fallback)
371     {
372       converter->use_fallback = use_fallback;
373       g_object_notify (G_OBJECT (converter), "use-fallback");
374     }
375 }
376
377 gboolean
378 g_charset_converter_get_use_fallback (GCharsetConverter *converter)
379 {
380   return converter->use_fallback;
381 }
382
383 guint
384 g_charset_converter_get_num_fallbacks (GCharsetConverter *converter)
385 {
386   return converter->n_fallback_errors;
387 }
388
389 static void
390 g_charset_converter_iface_init (GConverterIface *iface)
391 {
392   iface->convert = g_charset_converter_convert;
393   iface->reset = g_charset_converter_reset;
394 }
395
396 static gboolean
397 g_charset_converter_initable_init (GInitable *initable,
398                                    GCancellable *cancellable,
399                                    GError  **error)
400 {
401   GCharsetConverter  *conv;
402
403   g_return_val_if_fail (G_IS_CHARSET_CONVERTER (initable), FALSE);
404
405   conv = G_CHARSET_CONVERTER (initable);
406
407   if (cancellable != NULL)
408     {
409       g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,
410                            _("Cancellable initialization not supported"));
411       return FALSE;
412     }
413
414   conv->iconv =
415     g_iconv_open (conv->to, conv->from);
416
417   if (conv->iconv == NULL)
418     {
419       if (errno == EINVAL)
420         g_set_error (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,
421                      _("Conversion from character set '%s' to '%s' is not supported"),
422                      conv->from, conv->to);
423       else
424         g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
425                      _("Could not open converter from '%s' to '%s'"),
426                      conv->from, conv->to);
427       return FALSE;
428     }
429
430   return TRUE;
431 }
432
433 static void
434 g_charset_converter_initable_iface_init (GInitableIface *iface)
435 {
436   iface->init = g_charset_converter_initable_init;
437 }
438
439 #define __G_CHARSET_CONVERTER_C__
440 #include "gioaliasdef.c"