glib/gconvert.c

   1 /* GLIB - Library of useful routines for C programming
   2  *
   3  * gconvert.c: Convert between character sets using iconv
   4  * Copyright Red Hat Inc., 2000
   5  * Authors: Havoc Pennington <hp@redhat.com>, Owen Taylor <otaylor@redhat.com
   6  *
   7  * This library is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2 of the License, or (at your option) any later version.
  11  *
  12  * This library is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with this library; if not, write to the
  19  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20  * Boston, MA 02111-1307, USA.
  21  */
  22
  23 #include <iconv.h>
  24 #include <errno.h>
  25 #include <string.h>
  26 #include <stdlib.h>
  27
  28 #include "glib.h"
  29 #include "config.h"
  30
  31 #ifdef G_OS_WIN32
  32 #include <windows.h>
  33 #endif
  34
  35 #include "glibintl.h"
  36
  37 GQuark
  38 g_convert_error_quark()
  39 {
  40   static GQuark quark;
  41   if (!quark)
  42     quark = g_quark_from_static_string ("g_convert_error");
  43
  44   return quark;
  45 }
  46
  47 #if defined(USE_LIBICONV) && !defined (_LIBICONV_H)
  48 #error libiconv in use but included iconv.h not from libiconv
  49 #endif
  50 #if !defined(USE_LIBICONV) && defined (_LIBICONV_H)
  51 #error libiconv not in use but included iconv.h is from libiconv
  52 #endif
  53
  54 GIConv
  55 g_iconv_open (const gchar  *to_codeset,
  56               const gchar  *from_codeset)
  57 {
  58   iconv_t cd = iconv_open (to_codeset, from_codeset);
  59
  60   return (GIConv)cd;
  61 }
  62
  63 size_t
  64 g_iconv (GIConv   converter,
  65          gchar  **inbuf,
  66          size_t  *inbytes_left,
  67          gchar  **outbuf,
  68          size_t  *outbytes_left)
  69 {
  70   iconv_t cd = (iconv_t)converter;
  71
  72   return iconv (cd, inbuf, inbytes_left, outbuf, outbytes_left);
  73 }
  74
  75 gint
  76 g_iconv_close (GIConv converter)
  77 {
  78   iconv_t cd = (iconv_t)converter;
  79
  80   return iconv_close (cd);
  81 }
  82
  83 static GIConv
  84 open_converter (const gchar *to_codeset,
  85                 const gchar *from_codeset,
  86                 GError     **error)
  87 {
  88   GIConv cd = g_iconv_open (to_codeset, from_codeset);
  89
  90   if (cd == (iconv_t) -1)
  91     {
  92       /* Something went wrong.  */
  93       if (errno == EINVAL)
  94         g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION,
  95                      _("Conversion from character set `%s' to `%s' is not supported"),
  96                      from_codeset, to_codeset);
  97       else
  98         g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
  99                      _("Could not open converter from `%s' to `%s': %s"),
 100                      from_codeset, to_codeset, strerror (errno));
 101     }
 102
 103   return cd;
 104
 105 }
 106
 107 /**
 108  * g_convert:
 109  * @str:           the string to convert
 110  * @len:           the length of the string
 111  * @to_codeset:    name of character set into which to convert @str
 112  * @from_codeset:  character set of @str.
 113  * @bytes_read:    location to store the number of bytes in the
 114  *                 input string that were successfully converted, or %NULL.
 115  *                 Even if the conversion was succesful, this may be
 116  *                 less than len if there were partial characters
 117  *                 at the end of the input. If the error
 118  *                 G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
 119  *                 stored will the byte fofset after the last valid
 120  *                 input sequence.
 121  * @bytes_written: the stored in the output buffer (not including the
 122  *                 terminating nul.
 123  * @error:         location to store the error occuring, or %NULL to ignore
 124  *                 errors. Any of the errors in #GConvertError may occur.
 125  *
 126  * Convert a string from one character set to another.
 127  *
 128  * Return value: If the conversion was successful, a newly allocated
 129  *               NUL-terminated string, which must be freed with
 130  *               g_free. Otherwise %NULL and @error will be set.
 131  **/
 132 gchar*
 133 g_convert (const gchar *str,
 134            gint         len,
 135            const gchar *to_codeset,
 136            const gchar *from_codeset,
 137            gint        *bytes_read,
 138            gint        *bytes_written,
 139            GError     **error)
 140 {
 141   gchar *dest;
 142   gchar *outp;
 143   const gchar *p;
 144   size_t inbytes_remaining;
 145   size_t outbytes_remaining;
 146   size_t err;
 147   GIConv cd;
 148   size_t outbuf_size;
 149   gboolean have_error = FALSE;
 150
 151   g_return_val_if_fail (str != NULL, NULL);
 152   g_return_val_if_fail (to_codeset != NULL, NULL);
 153   g_return_val_if_fail (from_codeset != NULL, NULL);
 154
 155   cd = open_converter (to_codeset, from_codeset, error);
 156
 157   if (cd == (GIConv) -1)
 158     {
 159       if (bytes_read)
 160         *bytes_read = 0;
 161
 162       if (bytes_written)
 163         *bytes_written = 0;
 164
 165       return NULL;
 166     }
 167
 168   if (len < 0)
 169     len = strlen (str);
 170
 171   p = str;
 172   inbytes_remaining = len;
 173
 174   /* Due to a GLIBC bug, round outbuf_size up to a multiple of 4 */
 175   /* + 1 for nul in case len == 1 */
 176   outbuf_size = ((len + 3) & ~3) + 1;
 177
 178   outbytes_remaining = outbuf_size - 1; /* -1 for nul */
 179   outp = dest = g_malloc (outbuf_size);
 180
 181  again:
 182
 183   err = g_iconv (cd, (char **)&p, &inbytes_remaining, &outp, &outbytes_remaining);
 184
 185   if (err == (size_t) -1)
 186     {
 187       switch (errno)
 188         {
 189         case EINVAL:
 190           /* Incomplete text, do not report an error */
 191           break;
 192         case E2BIG:
 193           {
 194             size_t used = outp - dest;
 195
 196             /* glibc's iconv can return E2BIG even if there is space
 197              * remaining if an internal buffer is exhausted. The
 198              * folllowing is a heuristic to catch this. The 16 is
 199              * pretty arbitrary.
 200              */
 201             if (used + 16 > outbuf_size)
 202               {
 203                 outbuf_size = (outbuf_size - 1) * 2 + 1;
 204                 dest = g_realloc (dest, outbuf_size);
 205
 206                 outp = dest + used;
 207                 outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */
 208               }
 209
 210             goto again;
 211           }
 212         case EILSEQ:
 213           g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
 214                        _("Invalid byte sequence in conversion input"));
 215           have_error = TRUE;
 216           break;
 217         default:
 218           g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
 219                        _("Error during conversion: %s"),
 220                        strerror (errno));
 221           have_error = TRUE;
 222           break;
 223         }
 224     }
 225
 226   *outp = '\0';
 227
 228   g_iconv_close (cd);
 229
 230   if (bytes_read)
 231     *bytes_read = p - str;
 232   else
 233     {
 234       if ((p - str) != len)
 235         {
 236           if (!have_error)
 237             {
 238               g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
 239                            _("Partial character sequence at end of input"));
 240               have_error = TRUE;
 241             }
 242         }
 243     }
 244
 245   if (bytes_written)
 246     *bytes_written = outp - dest;       /* Doesn't include '\0' */
 247
 248   if (have_error)
 249     {
 250       g_free (dest);
 251       return NULL;
 252     }
 253   else
 254     return dest;
 255 }
 256
 257 /**
 258  * g_convert_with_fallback:
 259  * @str:          the string to convert
 260  * @len:          the length of the string
 261  * @to_codeset:   name of character set into which to convert @str
 262  * @from_codeset: character set of @str.
 263  * @fallback:     UTF-8 string to use in place of character not
 264  *                present in the target encoding. (This must be
 265  *                in the target encoding), if %NULL, characters
 266  *                not in the target encoding will be represented
 267  *                as Unicode escapes \x{XXXX} or \x{XXXXXX}.
 268  * @bytes_read:   location to store the number of bytes in the
 269  *                input string that were successfully converted, or %NULL.
 270  *                Even if the conversion was succesful, this may be
 271  *                less than len if there were partial characters
 272  *                at the end of the input. If the error
 273  *                G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
 274  *                stored will the byte fofset after the last valid
 275  *                input sequence.
 276  * @bytes_written: the stored in the output buffer (not including the
 277  *                 terminating nul.
 278  * @error:        location to store the error occuring, or %NULL to ignore
 279  *                errors. Any of the errors in #GConvertError may occur.
 280  *
 281  * Convert a string from one character set to another, possibly
 282  * including fallback sequences for characters not representable
 283  * in the output. Note that it is not guaranteed that the specification
 284  * for the fallback sequences in @fallback will be honored. Some
 285  * systems may do a approximate conversion from @from_codeset
 286  * to @to_codeset in their iconv() functions, in which case GLib
 287  * will simply return that approximate conversion.
 288  *
 289  * Return value: If the conversion was successful, a newly allocated
 290  *               NUL-terminated string, which must be freed with
 291  *               g_free. Otherwise %NULL and @error will be set.
 292  **/
 293 gchar*
 294 g_convert_with_fallback (const gchar *str,
 295                          gint         len,
 296                          const gchar *to_codeset,
 297                          const gchar *from_codeset,
 298                          gchar       *fallback,
 299                          gint        *bytes_read,
 300                          gint        *bytes_written,
 301                          GError     **error)
 302 {
 303   gchar *utf8;
 304   gchar *dest;
 305   gchar *outp;
 306   const gchar *insert_str = NULL;
 307   const gchar *p;
 308   int inbytes_remaining;
 309   const gchar *save_p = NULL;
 310   size_t save_inbytes = 0;
 311   size_t outbytes_remaining;
 312   size_t err;
 313   GIConv cd;
 314   size_t outbuf_size;
 315   gboolean have_error = FALSE;
 316   gboolean done = FALSE;
 317
 318   GError *local_error = NULL;
 319
 320   g_return_val_if_fail (str != NULL, NULL);
 321   g_return_val_if_fail (to_codeset != NULL, NULL);
 322   g_return_val_if_fail (from_codeset != NULL, NULL);
 323
 324   if (len < 0)
 325     len = strlen (str);
 326
 327   /* Try an exact conversion; we only proceed if this fails
 328    * due to an illegal sequence in the input string.
 329    */
 330   dest = g_convert (str, len, to_codeset, from_codeset,
 331                     bytes_read, bytes_written, &local_error);
 332   if (!local_error)
 333     return dest;
 334
 335   if (!g_error_matches (local_error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE))
 336     {
 337       g_propagate_error (error, local_error);
 338       return NULL;
 339     }
 340   else
 341     g_error_free (local_error);
 342
 343   local_error = NULL;
 344
 345   /* No go; to proceed, we need a converter from "UTF-8" to
 346    * to_codeset, and the string as UTF-8.
 347    */
 348   cd = open_converter (to_codeset, "UTF-8", error);
 349   if (cd == (GIConv) -1)
 350     {
 351       if (bytes_read)
 352         *bytes_read = 0;
 353
 354       if (bytes_written)
 355         *bytes_written = 0;
 356
 357       return NULL;
 358     }
 359
 360   utf8 = g_convert (str, len, "UTF-8", from_codeset,
 361                     bytes_read, &inbytes_remaining, error);
 362   if (!utf8)
 363     return NULL;
 364
 365   /* Now the heart of the code. We loop through the UTF-8 string, and
 366    * whenever we hit an offending character, we form fallback, convert
 367    * the fallback to the target codeset, and then go back to
 368    * converting the original string after finishing with the fallback.
 369    *
 370    * The variables save_p and save_inbytes store the input state
 371    * for the original string while we are converting the fallback
 372    */
 373   p = utf8;
 374   /* Due to a GLIBC bug, round outbuf_size up to a multiple of 4 */
 375   /* + 1 for nul in case len == 1 */
 376   outbuf_size = ((len + 3) & ~3) + 1;
 377   outbytes_remaining = outbuf_size - 1; /* -1 for nul */
 378   outp = dest = g_malloc (outbuf_size);
 379
 380   while (!done && !have_error)
 381     {
 382       size_t inbytes_tmp = inbytes_remaining;
 383       err = g_iconv (cd, (char **)&p, &inbytes_tmp, &outp, &outbytes_remaining);
 384       inbytes_remaining = inbytes_tmp;
 385
 386       if (err == (size_t) -1)
 387         {
 388           switch (errno)
 389             {
 390             case EINVAL:
 391               g_assert_not_reached();
 392               break;
 393             case E2BIG:
 394               {
 395                 size_t used = outp - dest;
 396
 397                 /* glibc's iconv can return E2BIG even if there is space
 398                  * remaining if an internal buffer is exhausted. The
 399                  * folllowing is a heuristic to catch this. The 16 is
 400                  * pretty arbitrary.
 401                  */
 402                 if (used + 16 > outbuf_size)
 403                   {
 404                     outbuf_size = (outbuf_size - 1) * 2 + 1;
 405                     dest = g_realloc (dest, outbuf_size);
 406
 407                     outp = dest + used;
 408                     outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */
 409                   }
 410
 411                 break;
 412               }
 413             case EILSEQ:
 414               if (save_p)
 415                 {
 416                   /* Error converting fallback string - fatal
 417                    */
 418                   g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
 419                                _("Cannot convert fallback '%s' to codeset '%s'"),
 420                                insert_str, to_codeset);
 421                   have_error = TRUE;
 422                   break;
 423                 }
 424               else
 425                 {
 426                   if (!fallback)
 427                     {
 428                       gunichar ch = g_utf8_get_char (p);
 429                       insert_str = g_strdup_printf ("\\x{%0*X}",
 430                                                     (ch < 0x10000) ? 4 : 6,
 431                                                     ch);
 432                     }
 433                   else
 434                     insert_str = fallback;
 435
 436                   save_p = g_utf8_next_char (p);
 437                   save_inbytes = inbytes_remaining - (save_p - p);
 438                   p = insert_str;
 439                   inbytes_remaining = strlen (p);
 440                 }
 441               break;
 442             default:
 443               g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
 444                            _("Error during conversion: %s"),
 445                            strerror (errno));
 446               have_error = TRUE;
 447               break;
 448             }
 449         }
 450       else
 451         {
 452           if (save_p)
 453             {
 454               if (!fallback)
 455                 g_free ((gchar *)insert_str);
 456               p = save_p;
 457               inbytes_remaining = save_inbytes;
 458               save_p = NULL;
 459             }
 460           else
 461             done = TRUE;
 462         }
 463     }
 464
 465   /* Cleanup
 466    */
 467   *outp = '\0';
 468
 469   g_iconv_close (cd);
 470
 471   if (bytes_written)
 472     *bytes_written = outp - str;        /* Doesn't include '\0' */
 473
 474   g_free (utf8);
 475
 476   if (have_error)
 477     {
 478       if (save_p && !fallback)
 479         g_free ((gchar *)insert_str);
 480       g_free (dest);
 481       return NULL;
 482     }
 483   else
 484     return dest;
 485 }
 486
 487 /*
 488  * g_locale_to_utf8
 489  *
 490  *
 491  */
 492
 493 /**
 494  * g_locale_to_utf8:
 495  * @opsysstring:   a string in the encoding of the current locale
 496  * @len:           the length of the string, or -1 if the string is
 497  *                 NULL-terminated.
 498  * @bytes_read:    location to store the number of bytes in the
 499  *                 input string that were successfully converted, or %NULL.
 500  *                 Even if the conversion was succesful, this may be
 501  *                 less than len if there were partial characters
 502  *                 at the end of the input. If the error
 503  *                 G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
 504  *                 stored will the byte fofset after the last valid
 505  *                 input sequence.
 506  * @bytes_written: the stored in the output buffer (not including the
 507  *                 terminating nul.
 508  * @error: location to store the error occuring, or %NULL to ignore
 509  *                 errors. Any of the errors in #GConvertError may occur.
 510  *
 511  * Converts a string which is in the encoding used for strings by
 512  * the C runtime (usually the same as that used by the operating
 513  * system) in the current locale into a UTF-8 string.
 514  *
 515  * Return value: The converted string, or %NULL on an error.
 516  **/
 517 gchar *
 518 g_locale_to_utf8 (const gchar  *opsysstring,
 519                   gint          len,
 520                   gint         *bytes_read,
 521                   gint         *bytes_written,
 522                   GError      **error)
 523 {
 524 #ifdef G_OS_WIN32
 525
 526   gint i, clen, total_len, wclen, first;
 527   wchar_t *wcs, wc;
 528   gchar *result, *bp;
 529   const wchar_t *wcp;
 530
 531   if (len == -1)
 532     len = strlen (opsysstring);
 533
 534   wcs = g_new (wchar_t, len);
 535   wclen = MultiByteToWideChar (CP_ACP, 0, opsysstring, len, wcs, len);
 536
 537   wcp = wcs;
 538   total_len = 0;
 539   for (i = 0; i < wclen; i++)
 540     {
 541       wc = *wcp++;
 542
 543       if (wc < 0x80)
 544         total_len += 1;
 545       else if (wc < 0x800)
 546         total_len += 2;
 547       else if (wc < 0x10000)
 548         total_len += 3;
 549       else if (wc < 0x200000)
 550         total_len += 4;
 551       else if (wc < 0x4000000)
 552         total_len += 5;
 553       else
 554         total_len += 6;
 555     }
 556
 557   result = g_malloc (total_len + 1);
 558
 559   wcp = wcs;
 560   bp = result;
 561   for (i = 0; i < wclen; i++)
 562     {
 563       wc = *wcp++;
 564
 565       if (wc < 0x80)
 566         {
 567           first = 0;
 568           clen = 1;
 569         }
 570       else if (wc < 0x800)
 571         {
 572           first = 0xc0;
 573           clen = 2;
 574         }
 575       else if (wc < 0x10000)
 576         {
 577           first = 0xe0;
 578           clen = 3;
 579         }
 580       else if (wc < 0x200000)
 581         {
 582           first = 0xf0;
 583           clen = 4;
 584         }
 585       else if (wc < 0x4000000)
 586         {
 587           first = 0xf8;
 588           clen = 5;
 589         }
 590       else
 591         {
 592           first = 0xfc;
 593           clen = 6;
 594         }
 595
 596       /* Woo-hoo! */
 597       switch (clen)
 598         {
 599         case 6: bp[5] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
 600         case 5: bp[4] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
 601         case 4: bp[3] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
 602         case 3: bp[2] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
 603         case 2: bp[1] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
 604         case 1: bp[0] = wc | first;
 605         }
 606
 607       bp += clen;
 608     }
 609   *bp = 0;
 610
 611   g_free (wcs);
 612
 613   if (bytes_read)
 614     *bytes_read = len;
 615   if (bytes_written)
 616     *bytes_written = total_len;
 617
 618   return result;
 619
 620 #else
 621
 622   char *charset, *str;
 623
 624   if (g_get_charset (&charset))
 625     return g_strdup (opsysstring);
 626
 627   str = g_convert (opsysstring, len,
 628                    "UTF-8", charset, bytes_read, bytes_written, error);
 629
 630   return str;
 631 #endif
 632 }
 633
 634 /**
 635  * g_locale_from_utf8:
 636  * @utf8string:    a UTF-8 encoded string
 637  * @len:           the length of the string, or -1 if the string is
 638  *                 NULL-terminated.
 639  * @bytes_read:    location to store the number of bytes in the
 640  *                 input string that were successfully converted, or %NULL.
 641  *                 Even if the conversion was succesful, this may be
 642  *                 less than len if there were partial characters
 643  *                 at the end of the input. If the error
 644  *                 G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
 645  *                 stored will the byte fofset after the last valid
 646  *                 input sequence.
 647  * @bytes_written: the stored in the output buffer (not including the
 648  *                 terminating nul.
 649  * @error: location to store the error occuring, or %NULL to ignore
 650  *                 errors. Any of the errors in #GConvertError may occur.
 651  *
 652  * Converts a string from UTF-8 to the encoding used for strings by
 653  * the C runtime (usually the same as that used by the operating
 654  * system) in the current locale.
 655  *
 656  * Return value: The converted string, or %NULL on an error.
 657  **/
 658 gchar *
 659 g_locale_from_utf8 (const gchar *utf8string,
 660                     gint         len,
 661                     gint        *bytes_read,
 662                     gint        *bytes_written,
 663                     GError     **error)
 664 {
 665 #ifdef G_OS_WIN32
 666
 667   gint i, mask, clen, mblen;
 668   wchar_t *wcs, *wcp;
 669   gchar *result;
 670   guchar *cp, *end, c;
 671   gint n;
 672
 673   if (len == -1)
 674     len = strlen (utf8string);
 675
 676   /* First convert to wide chars */
 677   cp = (guchar *) utf8string;
 678   end = cp + len;
 679   n = 0;
 680   wcs = g_new (wchar_t, len + 1);
 681   wcp = wcs;
 682   while (cp != end)
 683     {
 684       mask = 0;
 685       c = *cp;
 686
 687       if (c < 0x80)
 688         {
 689           clen = 1;
 690           mask = 0x7f;
 691         }
 692       else if ((c & 0xe0) == 0xc0)
 693         {
 694           clen = 2;
 695           mask = 0x1f;
 696         }
 697       else if ((c & 0xf0) == 0xe0)
 698         {
 699           clen = 3;
 700           mask = 0x0f;
 701         }
 702       else if ((c & 0xf8) == 0xf0)
 703         {
 704           clen = 4;
 705           mask = 0x07;
 706         }
 707       else if ((c & 0xfc) == 0xf8)
 708         {
 709           clen = 5;
 710           mask = 0x03;
 711         }
 712       else if ((c & 0xfc) == 0xfc)
 713         {
 714           clen = 6;
 715           mask = 0x01;
 716         }
 717       else
 718         {
 719           g_free (wcs);
 720           return NULL;
 721         }
 722
 723       if (cp + clen > end)
 724         {
 725           g_free (wcs);
 726           return NULL;
 727         }
 728
 729       *wcp = (cp[0] & mask);
 730       for (i = 1; i < clen; i++)
 731         {
 732           if ((cp[i] & 0xc0) != 0x80)
 733             {
 734               g_free (wcs);
 735               return NULL;
 736             }
 737           *wcp <<= 6;
 738           *wcp |= (cp[i] & 0x3f);
 739         }
 740
 741       cp += clen;
 742       wcp++;
 743       n++;
 744     }
 745   if (cp != end)
 746     {
 747       g_free (wcs);
 748       return NULL;
 749     }
 750
 751   /* n is the number of wide chars constructed */
 752
 753   /* Convert to a string in the current ANSI codepage */
 754
 755   result = g_new (gchar, 3 * n + 1);
 756   mblen = WideCharToMultiByte (CP_ACP, 0, wcs, n, result, 3*n, NULL, NULL);
 757   result[mblen] = 0;
 758   g_free (wcs);
 759
 760   if (bytes_read)
 761     *bytes_read = len;
 762   if (bytes_written)
 763     *bytes_written = mblen;
 764
 765   return result;
 766
 767 #else
 768
 769   gchar *charset, *str;
 770
 771   if (g_get_charset (&charset))
 772     return g_strdup (utf8string);
 773
 774   str = g_convert (utf8string, strlen (utf8string),
 775                    charset, "UTF-8", bytes_read, bytes_written, error);
 776
 777   return str;
 778
 779 #endif
 780 }
 781
 782 /**
 783  * g_filename_to_utf8:
 784  * @opsysstring:   a string in the encoding for filenames
 785  * @len:           the length of the string, or -1 if the string is
 786  *                 NULL-terminated.
 787  * @bytes_read:    location to store the number of bytes in the
 788  *                 input string that were successfully converted, or %NULL.
 789  *                 Even if the conversion was succesful, this may be
 790  *                 less than len if there were partial characters
 791  *                 at the end of the input. If the error
 792  *                 G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
 793  *                 stored will the byte fofset after the last valid
 794  *                 input sequence.
 795  * @bytes_written: the stored in the output buffer (not including the
 796  *                 terminating nul.
 797  * @error: location to store the error occuring, or %NULL to ignore
 798  *                 errors. Any of the errors in #GConvertError may occur.
 799  *
 800  * Converts a string which is in the encoding used for filenames
 801  * into a UTF-8 string.
 802  *
 803  * Return value: The converted string, or %NULL on an error.
 804  **/
 805 gchar*
 806 g_filename_to_utf8 (const gchar *opsysstring,
 807                     gint         len,
 808                     gint        *bytes_read,
 809                     gint        *bytes_written,
 810                     GError     **error)
 811 {
 812 #ifdef G_OS_WIN32
 813   return g_locale_to_utf8 (opsysstring, len,
 814                            bytes_read, bytes_written,
 815                            error);
 816 #else
 817   if (getenv ("G_BROKEN_FILENAMES"))
 818     return g_locale_to_utf8 (opsysstring, len,
 819                              bytes_read, bytes_written,
 820                              error);
 821
 822   if (bytes_read || bytes_written)
 823     {
 824       gint len = strlen (opsysstring);
 825
 826       if (bytes_read)
 827         *bytes_read = len;
 828       if (bytes_written)
 829         *bytes_written = len;
 830     }
 831
 832   if (len < 0)
 833     return g_strdup (opsysstring);
 834   else
 835     return g_strndup (opsysstring, len);
 836 #endif
 837 }
 838
 839 /**
 840  * g_filename_from_utf8:
 841  * @utf8string:    a UTF-8 encoded string
 842  * @len:           the length of the string, or -1 if the string is
 843  *                 NULL-terminated.
 844  * @bytes_read:    location to store the number of bytes in the
 845  *                 input string that were successfully converted, or %NULL.
 846  *                 Even if the conversion was succesful, this may be
 847  *                 less than len if there were partial characters
 848  *                 at the end of the input. If the error
 849  *                 G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
 850  *                 stored will the byte fofset after the last valid
 851  *                 input sequence.
 852  * @bytes_written: the stored in the output buffer (not including the
 853  *                 terminating nul.
 854  * @error: location to store the error occuring, or %NULL to ignore
 855  *                 errors. Any of the errors in #GConvertError may occur.
 856  *
 857  * Converts a string from UTF-8 to the encoding used for filenames.
 858  *
 859  * Return value: The converted string, or %NULL on an error.
 860  **/
 861 gchar*
 862 g_filename_from_utf8 (const gchar *utf8string,
 863                       gint         len,
 864                       gint        *bytes_read,
 865                       gint        *bytes_written,
 866                       GError     **error)
 867 {
 868 #ifdef G_OS_WIN32
 869   return g_locale_from_utf8 (utf8string, len,
 870                              bytes_read, bytes_written,
 871                              error);
 872 #else
 873   if (getenv ("G_BROKEN_FILENAMES"))
 874     return g_locale_from_utf8 (utf8string, len,
 875                                bytes_read, bytes_written,
 876                                error);
 877
 878   if (bytes_read || bytes_written)
 879     {
 880       gint len = strlen (utf8string);
 881
 882       if (bytes_read)
 883         *bytes_read = len;
 884       if (bytes_written)
 885         *bytes_written = len;
 886     }
 887
 888   if (len < 0)
 889     return g_strdup (utf8string);
 890   else
 891     return g_strndup (utf8string, len);
 892 #endif
 893 }
 894
 895