Add g_str_is_ascii()
[platform/upstream/glib.git] / gio / gcharsetconverter.c
1 /* GIO - GLib Input, Output and Streaming Library
2  *
3  * Copyright (C) 2009 Red Hat, Inc.
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General
16  * Public License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
18  * Boston, MA 02111-1307, USA.
19  *
20  * Author: Alexander Larsson <alexl@redhat.com>
21  */
22
23 #include "config.h"
24
25 #include "gcharsetconverter.h"
26
27 #include <errno.h>
28
29 #include "ginitable.h"
30 #include "gioerror.h"
31 #include "glibintl.h"
32
33
34 enum {
35   PROP_0,
36   PROP_FROM_CHARSET,
37   PROP_TO_CHARSET,
38   PROP_USE_FALLBACK
39 };
40
41 /**
42  * SECTION:gcharsetconverter
43  * @short_description: Convert between charsets
44  * @include: gio/gio.h
45  *
46  * #GCharsetConverter is an implementation of #GConverter based on
47  * GIConv.
48  */
49
50 static void g_charset_converter_iface_init          (GConverterIface *iface);
51 static void g_charset_converter_initable_iface_init (GInitableIface  *iface);
52
53 /**
54  * GCharsetConverter:
55  *
56  * Conversions between character sets.
57  */
58 struct _GCharsetConverter
59 {
60   GObject parent_instance;
61
62   char *from;
63   char *to;
64   GIConv iconv;
65   gboolean use_fallback;
66   guint n_fallback_errors;
67 };
68
69 G_DEFINE_TYPE_WITH_CODE (GCharsetConverter, g_charset_converter, G_TYPE_OBJECT,
70                          G_IMPLEMENT_INTERFACE (G_TYPE_CONVERTER,
71                                                 g_charset_converter_iface_init);
72                          G_IMPLEMENT_INTERFACE (G_TYPE_INITABLE,
73                                                 g_charset_converter_initable_iface_init))
74
75 static void
76 g_charset_converter_finalize (GObject *object)
77 {
78   GCharsetConverter *conv;
79
80   conv = G_CHARSET_CONVERTER (object);
81
82   g_free (conv->from);
83   g_free (conv->to);
84   if (conv->iconv)
85     g_iconv_close (conv->iconv);
86
87   G_OBJECT_CLASS (g_charset_converter_parent_class)->finalize (object);
88 }
89
90 static void
91 g_charset_converter_set_property (GObject      *object,
92                                   guint         prop_id,
93                                   const GValue *value,
94                                   GParamSpec   *pspec)
95 {
96   GCharsetConverter *conv;
97
98   conv = G_CHARSET_CONVERTER (object);
99
100   switch (prop_id)
101     {
102     case PROP_TO_CHARSET:
103       g_free (conv->to);
104       conv->to = g_value_dup_string (value);
105       break;
106
107     case PROP_FROM_CHARSET:
108       g_free (conv->from);
109       conv->from = g_value_dup_string (value);
110       break;
111
112     case PROP_USE_FALLBACK:
113       conv->use_fallback = g_value_get_boolean (value);
114       break;
115
116     default:
117       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
118       break;
119     }
120
121 }
122
123 static void
124 g_charset_converter_get_property (GObject    *object,
125                                   guint       prop_id,
126                                   GValue     *value,
127                                   GParamSpec *pspec)
128 {
129   GCharsetConverter *conv;
130
131   conv = G_CHARSET_CONVERTER (object);
132
133   switch (prop_id)
134     {
135     case PROP_TO_CHARSET:
136       g_value_set_string (value, conv->to);
137       break;
138
139     case PROP_FROM_CHARSET:
140       g_value_set_string (value, conv->from);
141       break;
142
143     case PROP_USE_FALLBACK:
144       g_value_set_boolean (value, conv->use_fallback);
145       break;
146
147     default:
148       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
149       break;
150     }
151 }
152
153 static void
154 g_charset_converter_class_init (GCharsetConverterClass *klass)
155 {
156   GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
157
158   gobject_class->finalize = g_charset_converter_finalize;
159   gobject_class->get_property = g_charset_converter_get_property;
160   gobject_class->set_property = g_charset_converter_set_property;
161
162   g_object_class_install_property (gobject_class,
163                                    PROP_TO_CHARSET,
164                                    g_param_spec_string ("to-charset",
165                                                         P_("To Charset"),
166                                                         P_("The character encoding to convert to"),
167                                                         NULL,
168                                                         G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY |
169                                                         G_PARAM_STATIC_STRINGS));
170   g_object_class_install_property (gobject_class,
171                                    PROP_FROM_CHARSET,
172                                    g_param_spec_string ("from-charset",
173                                                         P_("From Charset"),
174                                                         P_("The character encoding to convert from"),
175                                                         NULL,
176                                                         G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY |
177                                                         G_PARAM_STATIC_STRINGS));
178   g_object_class_install_property (gobject_class,
179                                    PROP_USE_FALLBACK,
180                                    g_param_spec_boolean ("use-fallback",
181                                                          P_("Fallback enabled"),
182                                                          P_("Use fallback (of form \\<hexval>) for invalid bytes"),
183                                                          FALSE,
184                                                          G_PARAM_READWRITE |
185                                                          G_PARAM_CONSTRUCT |
186                                                          G_PARAM_STATIC_STRINGS));
187 }
188
189 static void
190 g_charset_converter_init (GCharsetConverter *local)
191 {
192 }
193
194
195 /**
196  * g_charset_converter_new:
197  * @to_charset: destination charset
198  * @from_charset: source charset
199  * @error: #GError for error reporting, or %NULL to ignore.
200  *
201  * Creates a new #GCharsetConverter.
202  *
203  * Returns: a new #GCharsetConverter or %NULL on error.
204  *
205  * Since: 2.24
206  **/
207 GCharsetConverter *
208 g_charset_converter_new (const gchar  *to_charset,
209                          const gchar  *from_charset,
210                          GError       **error)
211 {
212   GCharsetConverter *conv;
213
214   conv = g_initable_new (G_TYPE_CHARSET_CONVERTER,
215                          NULL, error,
216                          "to-charset", to_charset,
217                          "from-charset", from_charset,
218                          NULL);
219
220   return conv;
221 }
222
223 static void
224 g_charset_converter_reset (GConverter *converter)
225 {
226   GCharsetConverter *conv = G_CHARSET_CONVERTER (converter);
227
228   if (conv->iconv == NULL)
229     {
230       g_warning ("Invalid object, not initialized");
231       return;
232     }
233
234   g_iconv (conv->iconv, NULL, NULL, NULL, NULL);
235   conv->n_fallback_errors = 0;
236 }
237
238 static GConverterResult
239 g_charset_converter_convert (GConverter *converter,
240                              const void *inbuf,
241                              gsize       inbuf_size,
242                              void       *outbuf,
243                              gsize       outbuf_size,
244                              GConverterFlags flags,
245                              gsize      *bytes_read,
246                              gsize      *bytes_written,
247                              GError    **error)
248 {
249   GCharsetConverter  *conv;
250   gsize res;
251   GConverterResult ret;
252   gchar *inbufp, *outbufp;
253   gsize in_left, out_left;
254   int errsv;
255   gboolean reset;
256
257   conv = G_CHARSET_CONVERTER (converter);
258
259   if (conv->iconv == NULL)
260     {
261       g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_INITIALIZED,
262                            _("Invalid object, not initialized"));
263       return G_CONVERTER_ERROR;
264     }
265
266   inbufp = (char *)inbuf;
267   outbufp = (char *)outbuf;
268   in_left = inbuf_size;
269   out_left = outbuf_size;
270   reset = FALSE;
271
272   /* if there is not input try to flush the data */
273   if (inbuf_size == 0)
274     {
275       if (flags & G_CONVERTER_INPUT_AT_END ||
276           flags & G_CONVERTER_FLUSH)
277         {
278           reset = TRUE;
279         }
280       else
281         {
282           g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT,
283                                _("Incomplete multibyte sequence in input"));
284           return G_CONVERTER_ERROR;
285         }
286     }
287
288   if (reset)
289     /* call g_iconv with NULL inbuf to cleanup shift state */
290     res = g_iconv (conv->iconv,
291                    NULL, &in_left,
292                    &outbufp, &out_left);
293   else
294     res = g_iconv (conv->iconv,
295                    &inbufp, &in_left,
296                    &outbufp, &out_left);
297
298   *bytes_read = inbufp - (char *)inbuf;
299   *bytes_written = outbufp - (char *)outbuf;
300
301   /* Don't report error if we converted anything */
302   if (res == (gsize) -1 && *bytes_read == 0)
303     {
304       errsv = errno;
305
306       switch (errsv)
307         {
308         case EINVAL:
309           /* Incomplete input text */
310           g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT,
311                                _("Incomplete multibyte sequence in input"));
312           break;
313
314         case E2BIG:
315           /* Not enough destination space */
316           g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NO_SPACE,
317                                _("Not enough space in destination"));
318           break;
319
320         case EILSEQ:
321           /* Invalid code sequence */
322           if (conv->use_fallback)
323             {
324               if (outbuf_size < 3)
325                 g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NO_SPACE,
326                                      _("Not enough space in destination"));
327               else
328                 {
329                   const char hex[] = "0123456789ABCDEF";
330                   guint8 v = *(guint8 *)inbuf;
331                   guint8 *out = (guint8 *)outbuf;
332                   out[0] = '\\';
333                   out[1] = hex[(v & 0xf0) >> 4];
334                   out[2] = hex[(v & 0x0f) >> 0];
335                   *bytes_read = 1;
336                   *bytes_written = 3;
337                   in_left--;
338                   conv->n_fallback_errors++;
339                   goto ok;
340                 }
341             }
342           else
343             g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA,
344                                  _("Invalid byte sequence in conversion input"));
345           break;
346
347         default:
348           g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
349                        _("Error during conversion: %s"),
350                        g_strerror (errsv));
351           break;
352         }
353       ret = G_CONVERTER_ERROR;
354     }
355   else
356     {
357     ok:
358       ret = G_CONVERTER_CONVERTED;
359
360       if (reset &&
361           (flags & G_CONVERTER_INPUT_AT_END))
362         ret = G_CONVERTER_FINISHED;
363       else if (reset &&
364                (flags & G_CONVERTER_FLUSH))
365         ret = G_CONVERTER_FLUSHED;
366     }
367
368   return ret;
369 }
370
371 /**
372  * g_charset_converter_set_use_fallback:
373  * @converter: a #GCharsetConverter
374  * @use_fallback: %TRUE to use fallbacks
375  *
376  * Sets the #GCharsetConverter:use-fallback property.
377  *
378  * Since: 2.24
379  */
380 void
381 g_charset_converter_set_use_fallback (GCharsetConverter *converter,
382                                       gboolean           use_fallback)
383 {
384   use_fallback = !!use_fallback;
385
386   if (converter->use_fallback != use_fallback)
387     {
388       converter->use_fallback = use_fallback;
389       g_object_notify (G_OBJECT (converter), "use-fallback");
390     }
391 }
392
393 /**
394  * g_charset_converter_get_use_fallback:
395  * @converter: a #GCharsetConverter
396  *
397  * Gets the #GCharsetConverter:use-fallback property.
398  *
399  * Returns: %TRUE if fallbacks are used by @converter
400  *
401  * Since: 2.24
402  */
403 gboolean
404 g_charset_converter_get_use_fallback (GCharsetConverter *converter)
405 {
406   return converter->use_fallback;
407 }
408
409 /**
410  * g_charset_converter_get_num_fallbacks:
411  * @converter: a #GCharsetConverter
412  *
413  * Gets the number of fallbacks that @converter has applied so far.
414  *
415  * Returns: the number of fallbacks that @converter has applied
416  *
417  * Since: 2.24
418  */
419 guint
420 g_charset_converter_get_num_fallbacks (GCharsetConverter *converter)
421 {
422   return converter->n_fallback_errors;
423 }
424
425 static void
426 g_charset_converter_iface_init (GConverterIface *iface)
427 {
428   iface->convert = g_charset_converter_convert;
429   iface->reset = g_charset_converter_reset;
430 }
431
432 static gboolean
433 g_charset_converter_initable_init (GInitable *initable,
434                                    GCancellable *cancellable,
435                                    GError  **error)
436 {
437   GCharsetConverter  *conv;
438
439   g_return_val_if_fail (G_IS_CHARSET_CONVERTER (initable), FALSE);
440
441   conv = G_CHARSET_CONVERTER (initable);
442
443   if (cancellable != NULL)
444     {
445       g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,
446                            _("Cancellable initialization not supported"));
447       return FALSE;
448     }
449
450   conv->iconv =
451     g_iconv_open (conv->to, conv->from);
452
453   if (conv->iconv == (GIConv)-1)
454     {
455       if (errno == EINVAL)
456         g_set_error (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,
457                      _("Conversion from character set '%s' to '%s' is not supported"),
458                      conv->from, conv->to);
459       else
460         g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
461                      _("Could not open converter from '%s' to '%s'"),
462                      conv->from, conv->to);
463       return FALSE;
464     }
465
466   return TRUE;
467 }
468
469 static void
470 g_charset_converter_initable_iface_init (GInitableIface *iface)
471 {
472   iface->init = g_charset_converter_initable_init;
473 }