Merge remote-tracking branch 'gvdb/master'
[platform/upstream/glib.git] / gio / gcharsetconverter.c
1 /* GIO - GLib Input, Output and Streaming Library
2  *
3  * Copyright (C) 2009 Red Hat, Inc.
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General
16  * Public License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
18  * Boston, MA 02111-1307, USA.
19  *
20  * Author: Alexander Larsson <alexl@redhat.com>
21  */
22
23 #include "config.h"
24
25 #include "gcharsetconverter.h"
26
27 #include <errno.h>
28
29 #include "gcontenttypeprivate.h"
30 #include "ginitable.h"
31 #include "gioerror.h"
32 #include "glibintl.h"
33
34
35 enum {
36   PROP_0,
37   PROP_FROM_CHARSET,
38   PROP_TO_CHARSET,
39   PROP_USE_FALLBACK
40 };
41
42 /**
43  * SECTION:gcharsetconverter
44  * @short_description: Convert between charsets
45  * @include: gio/gio.h
46  *
47  * #GCharsetConverter is an implementation of #GConverter based on
48  * GIConv.
49  */
50
51 static void g_charset_converter_iface_init          (GConverterIface *iface);
52 static void g_charset_converter_initable_iface_init (GInitableIface  *iface);
53
54 /**
55  * GCharsetConverter:
56  *
57  * Conversions between character sets.
58  */
59 struct _GCharsetConverter
60 {
61   GObject parent_instance;
62
63   char *from;
64   char *to;
65   GIConv iconv;
66   gboolean use_fallback;
67   guint n_fallback_errors;
68 };
69
70 G_DEFINE_TYPE_WITH_CODE (GCharsetConverter, g_charset_converter, G_TYPE_OBJECT,
71                          G_IMPLEMENT_INTERFACE (G_TYPE_CONVERTER,
72                                                 g_charset_converter_iface_init);
73                          G_IMPLEMENT_INTERFACE (G_TYPE_INITABLE,
74                                                 g_charset_converter_initable_iface_init))
75
76 static void
77 g_charset_converter_finalize (GObject *object)
78 {
79   GCharsetConverter *conv;
80
81   conv = G_CHARSET_CONVERTER (object);
82
83   g_free (conv->from);
84   g_free (conv->to);
85   if (conv->iconv)
86     g_iconv_close (conv->iconv);
87
88   G_OBJECT_CLASS (g_charset_converter_parent_class)->finalize (object);
89 }
90
91 static void
92 g_charset_converter_set_property (GObject      *object,
93                                   guint         prop_id,
94                                   const GValue *value,
95                                   GParamSpec   *pspec)
96 {
97   GCharsetConverter *conv;
98
99   conv = G_CHARSET_CONVERTER (object);
100
101   switch (prop_id)
102     {
103     case PROP_TO_CHARSET:
104       g_free (conv->to);
105       conv->to = g_value_dup_string (value);
106       break;
107
108     case PROP_FROM_CHARSET:
109       g_free (conv->from);
110       conv->from = g_value_dup_string (value);
111       break;
112
113     case PROP_USE_FALLBACK:
114       conv->use_fallback = g_value_get_boolean (value);
115       break;
116
117     default:
118       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
119       break;
120     }
121
122 }
123
124 static void
125 g_charset_converter_get_property (GObject    *object,
126                                   guint       prop_id,
127                                   GValue     *value,
128                                   GParamSpec *pspec)
129 {
130   GCharsetConverter *conv;
131
132   conv = G_CHARSET_CONVERTER (object);
133
134   switch (prop_id)
135     {
136     case PROP_TO_CHARSET:
137       g_value_set_string (value, conv->to);
138       break;
139
140     case PROP_FROM_CHARSET:
141       g_value_set_string (value, conv->from);
142       break;
143
144     case PROP_USE_FALLBACK:
145       g_value_set_boolean (value, conv->use_fallback);
146       break;
147
148     default:
149       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
150       break;
151     }
152 }
153
154 static void
155 g_charset_converter_class_init (GCharsetConverterClass *klass)
156 {
157   GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
158
159   gobject_class->finalize = g_charset_converter_finalize;
160   gobject_class->get_property = g_charset_converter_get_property;
161   gobject_class->set_property = g_charset_converter_set_property;
162
163   g_object_class_install_property (gobject_class,
164                                    PROP_TO_CHARSET,
165                                    g_param_spec_string ("to-charset",
166                                                         P_("To Charset"),
167                                                         P_("The character encoding to convert to"),
168                                                         NULL,
169                                                         G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY |
170                                                         G_PARAM_STATIC_STRINGS));
171   g_object_class_install_property (gobject_class,
172                                    PROP_FROM_CHARSET,
173                                    g_param_spec_string ("from-charset",
174                                                         P_("From Charset"),
175                                                         P_("The character encoding to convert from"),
176                                                         NULL,
177                                                         G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY |
178                                                         G_PARAM_STATIC_STRINGS));
179   g_object_class_install_property (gobject_class,
180                                    PROP_USE_FALLBACK,
181                                    g_param_spec_boolean ("use-fallback",
182                                                          P_("Fallback enabled"),
183                                                          P_("Use fallback (of form \\<hexval>) for invalid bytes"),
184                                                          FALSE,
185                                                          G_PARAM_READWRITE |
186                                                          G_PARAM_CONSTRUCT |
187                                                          G_PARAM_STATIC_STRINGS));
188 }
189
190 static void
191 g_charset_converter_init (GCharsetConverter *local)
192 {
193 }
194
195
196 /**
197  * g_charset_converter_new:
198  * @to_charset: destination charset
199  * @from_charset: source charset
200  * @error: #GError for error reporting, or %NULL to ignore.
201  *
202  * Creates a new #GCharsetConverter.
203  *
204  * Returns: a new #GCharsetConverter or %NULL on error.
205  *
206  * Since: 2.24
207  **/
208 GCharsetConverter *
209 g_charset_converter_new (const gchar  *to_charset,
210                          const gchar  *from_charset,
211                          GError       **error)
212 {
213   GCharsetConverter *conv;
214
215   conv = g_initable_new (G_TYPE_CHARSET_CONVERTER,
216                          NULL, error,
217                          "to-charset", to_charset,
218                          "from-charset", from_charset,
219                          NULL);
220
221   return conv;
222 }
223
224 static void
225 g_charset_converter_reset (GConverter *converter)
226 {
227   GCharsetConverter *conv = G_CHARSET_CONVERTER (converter);
228
229   if (conv->iconv == NULL)
230     {
231       g_warning ("Invalid object, not initialized");
232       return;
233     }
234
235   g_iconv (conv->iconv, NULL, NULL, NULL, NULL);
236   conv->n_fallback_errors = 0;
237 }
238
239 static GConverterResult
240 g_charset_converter_convert (GConverter *converter,
241                              const void *inbuf,
242                              gsize       inbuf_size,
243                              void       *outbuf,
244                              gsize       outbuf_size,
245                              GConverterFlags flags,
246                              gsize      *bytes_read,
247                              gsize      *bytes_written,
248                              GError    **error)
249 {
250   GCharsetConverter  *conv;
251   gsize res;
252   GConverterResult ret;
253   gchar *inbufp, *outbufp;
254   gsize in_left, out_left;
255   int errsv;
256   gboolean reset;
257
258   conv = G_CHARSET_CONVERTER (converter);
259
260   if (conv->iconv == NULL)
261     {
262       g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_INITIALIZED,
263                            _("Invalid object, not initialized"));
264       return G_CONVERTER_ERROR;
265     }
266
267   inbufp = (char *)inbuf;
268   outbufp = (char *)outbuf;
269   in_left = inbuf_size;
270   out_left = outbuf_size;
271   reset = FALSE;
272
273   /* if there is not input try to flush the data */
274   if (inbuf_size == 0)
275     {
276       if (flags & G_CONVERTER_INPUT_AT_END ||
277           flags & G_CONVERTER_FLUSH)
278         {
279           reset = TRUE;
280         }
281       else
282         {
283           g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT,
284                                _("Incomplete multibyte sequence in input"));
285           return G_CONVERTER_ERROR;
286         }
287     }
288
289   if (reset)
290     /* call g_iconv with NULL inbuf to cleanup shift state */
291     res = g_iconv (conv->iconv,
292                    NULL, &in_left,
293                    &outbufp, &out_left);
294   else
295     res = g_iconv (conv->iconv,
296                    &inbufp, &in_left,
297                    &outbufp, &out_left);
298
299   *bytes_read = inbufp - (char *)inbuf;
300   *bytes_written = outbufp - (char *)outbuf;
301
302   /* Don't report error if we converted anything */
303   if (res == (gsize) -1 && *bytes_read == 0)
304     {
305       errsv = errno;
306
307       switch (errsv)
308         {
309         case EINVAL:
310           /* Incomplete input text */
311           g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT,
312                                _("Incomplete multibyte sequence in input"));
313           break;
314
315         case E2BIG:
316           /* Not enough destination space */
317           g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NO_SPACE,
318                                _("Not enough space in destination"));
319           break;
320
321         case EILSEQ:
322           /* Invalid code sequence */
323           if (conv->use_fallback)
324             {
325               if (outbuf_size < 3)
326                 g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NO_SPACE,
327                                      _("Not enough space in destination"));
328               else
329                 {
330                   const char hex[] = "0123456789ABCDEF";
331                   guint8 v = *(guint8 *)inbuf;
332                   guint8 *out = (guint8 *)outbuf;
333                   out[0] = '\\';
334                   out[1] = hex[(v & 0xf0) >> 4];
335                   out[2] = hex[(v & 0x0f) >> 0];
336                   *bytes_read = 1;
337                   *bytes_written = 3;
338                   in_left--;
339                   conv->n_fallback_errors++;
340                   goto ok;
341                 }
342             }
343           else
344             g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA,
345                                  _("Invalid byte sequence in conversion input"));
346           break;
347
348         default:
349           g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
350                        _("Error during conversion: %s"),
351                        g_strerror (errsv));
352           break;
353         }
354       ret = G_CONVERTER_ERROR;
355     }
356   else
357     {
358     ok:
359       ret = G_CONVERTER_CONVERTED;
360
361       if (reset &&
362           (flags & G_CONVERTER_INPUT_AT_END))
363         ret = G_CONVERTER_FINISHED;
364       else if (reset &&
365                (flags & G_CONVERTER_FLUSH))
366         ret = G_CONVERTER_FLUSHED;
367     }
368
369   return ret;
370 }
371
372 /**
373  * g_charset_converter_set_use_fallback:
374  * @converter: a #GCharsetConverter
375  * @use_fallback: %TRUE to use fallbacks
376  *
377  * Sets the #GCharsetConverter:use-fallback property.
378  *
379  * Since: 2.24
380  */
381 void
382 g_charset_converter_set_use_fallback (GCharsetConverter *converter,
383                                       gboolean           use_fallback)
384 {
385   use_fallback = !!use_fallback;
386
387   if (converter->use_fallback != use_fallback)
388     {
389       converter->use_fallback = use_fallback;
390       g_object_notify (G_OBJECT (converter), "use-fallback");
391     }
392 }
393
394 /**
395  * g_charset_converter_get_use_fallback:
396  * @converter: a #GCharsetConverter
397  *
398  * Gets the #GCharsetConverter:use-fallback property.
399  *
400  * Returns: %TRUE if fallbacks are used by @converter
401  *
402  * Since: 2.24
403  */
404 gboolean
405 g_charset_converter_get_use_fallback (GCharsetConverter *converter)
406 {
407   return converter->use_fallback;
408 }
409
410 /**
411  * g_charset_converter_get_num_fallbacks:
412  * @converter: a #GCharsetConverter
413  *
414  * Gets the number of fallbacks that @converter has applied so far.
415  *
416  * Returns: the number of fallbacks that @converter has applied
417  *
418  * Since: 2.24
419  */
420 guint
421 g_charset_converter_get_num_fallbacks (GCharsetConverter *converter)
422 {
423   return converter->n_fallback_errors;
424 }
425
426 static void
427 g_charset_converter_iface_init (GConverterIface *iface)
428 {
429   iface->convert = g_charset_converter_convert;
430   iface->reset = g_charset_converter_reset;
431 }
432
433 static gboolean
434 g_charset_converter_initable_init (GInitable *initable,
435                                    GCancellable *cancellable,
436                                    GError  **error)
437 {
438   GCharsetConverter  *conv;
439
440   g_return_val_if_fail (G_IS_CHARSET_CONVERTER (initable), FALSE);
441
442   conv = G_CHARSET_CONVERTER (initable);
443
444   if (cancellable != NULL)
445     {
446       g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,
447                            _("Cancellable initialization not supported"));
448       return FALSE;
449     }
450
451   conv->iconv =
452     g_iconv_open (conv->to, conv->from);
453
454   if (conv->iconv == (GIConv)-1)
455     {
456       if (errno == EINVAL)
457         g_set_error (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,
458                      _("Conversion from character set '%s' to '%s' is not supported"),
459                      conv->from, conv->to);
460       else
461         g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
462                      _("Could not open converter from '%s' to '%s'"),
463                      conv->from, conv->to);
464       return FALSE;
465     }
466
467   return TRUE;
468 }
469
470 static void
471 g_charset_converter_initable_iface_init (GInitableIface *iface)
472 {
473   iface->init = g_charset_converter_initable_init;
474 }