common/utf8conv.c

   1 /* utf8conf.c -  UTF8 character set conversion
   2  * Copyright (C) 1994, 1998, 1999, 2000, 2001, 2003, 2006,
   3  *               2008, 2010  Free Software Foundation, Inc.
   4  *
   5  * This file is part of GnuPG.
   6  *
   7  * GnuPG is free software; you can redistribute it and/or modify it
   8  * under the terms of either
   9  *
  10  *   - the GNU Lesser General Public License as published by the Free
  11  *     Software Foundation; either version 3 of the License, or (at
  12  *     your option) any later version.
  13  *
  14  * or
  15  *
  16  *   - the GNU General Public License as published by the Free
  17  *     Software Foundation; either version 2 of the License, or (at
  18  *     your option) any later version.
  19  *
  20  * or both in parallel, as here.
  21  *
  22  * GnuPG is distributed in the hope that it will be useful, but
  23  * WITHOUT ANY WARRANTY; without even the implied warranty of
  24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  25  * General Public License for more details.
  26  *
  27  * You should have received a copies of the GNU General Public License
  28  * and the GNU Lesser General Public License along with this program;
  29  * if not, see <http://www.gnu.org/licenses/>.
  30  */
  31
  32 #include <config.h>
  33 #include <stdlib.h>
  34 #include <string.h>
  35 #include <stdarg.h>
  36 #include <ctype.h>
  37 #ifdef HAVE_LANGINFO_CODESET
  38 #include <langinfo.h>
  39 #endif
  40 #include <errno.h>
  41 #ifndef HAVE_ANDROID_SYSTEM
  42 # include <iconv.h>
  43 #endif
  44
  45 #include "util.h"
  46 #include "common-defs.h"
  47 #include "i18n.h"
  48 #include "stringhelp.h"
  49 #include "utf8conv.h"
  50
  51 #ifndef MB_LEN_MAX
  52 #define MB_LEN_MAX 16
  53 #endif
  54
  55 static const char *active_charset_name = "iso-8859-1";
  56 static int no_translation;     /* Set to true if we let simply pass through. */
  57 static int use_iconv;          /* iconv conversion functions required. */
  58
  59
  60 #ifdef HAVE_ANDROID_SYSTEM
  61 /* Fake stuff to get things building.  */
  62 typedef void *iconv_t;
  63 #define ICONV_CONST
  64
  65 static iconv_t
  66 iconv_open (const char *tocode, const char *fromcode)
  67 {
  68   (void)tocode;
  69   (void)fromcode;
  70   return (iconv_t)(-1);
  71 }
  72
  73 static size_t
  74 iconv (iconv_t cd, char **inbuf, size_t *inbytesleft,
  75        char **outbuf, size_t *outbytesleft)
  76 {
  77   (void)cd;
  78   (void)inbuf;
  79   (void)inbytesleft;
  80   (void)outbuf;
  81   (void)outbytesleft;
  82   return (size_t)(0);
  83 }
  84
  85 static int
  86 iconv_close (iconv_t cd)
  87 {
  88   (void)cd;
  89   return 0;
  90 }
  91 #endif /*HAVE_ANDROID_SYSTEM*/
  92
  93
  94 /* Error handler for iconv failures. This is needed to not clutter the
  95    output with repeated diagnostics about a missing conversion. */
  96 static void
  97 handle_iconv_error (const char *to, const char *from, int use_fallback)
  98 {
  99   if (errno == EINVAL)
 100     {
 101       static int shown1, shown2;
 102       int x;
 103
 104       if (to && !strcmp (to, "utf-8"))
 105         {
 106           x = shown1;
 107           shown1 = 1;
 108         }
 109       else
 110         {
 111           x = shown2;
 112           shown2 = 1;
 113         }
 114
 115       if (!x)
 116         log_info (_("conversion from '%s' to '%s' not available\n"),
 117                   from, to);
 118     }
 119   else
 120     {
 121       static int shown;
 122
 123       if (!shown)
 124         log_info (_("iconv_open failed: %s\n"), strerror (errno));
 125       shown = 1;
 126     }
 127
 128   if (use_fallback)
 129     {
 130       /* To avoid further error messages we fallback to UTF-8 for the
 131          native encoding.  Nowadays this seems to be the best bet in
 132          case of errors from iconv or nl_langinfo.  */
 133       active_charset_name = "utf-8";
 134       no_translation = 0;
 135       use_iconv = 0;
 136     }
 137 }
 138
 139
 140
 141 int
 142 set_native_charset (const char *newset)
 143 {
 144   const char *full_newset;
 145
 146   if (!newset)
 147     {
 148 #ifdef HAVE_ANDROID_SYSTEM
 149       newset = "utf-8";
 150 #elif defined HAVE_W32_SYSTEM
 151       static char codepage[30];
 152       unsigned int cpno;
 153       const char *aliases;
 154
 155       /* We are a console program thus we need to use the
 156          GetConsoleOutputCP function and not the the GetACP which
 157          would give the codepage for a GUI program.  Note this is not
 158          a bulletproof detection because GetConsoleCP might return a
 159          different one for console input.  Not sure how to cope with
 160          that.  If the console Code page is not known we fall back to
 161          the system code page.  */
 162 #ifndef HAVE_W32CE_SYSTEM
 163       cpno = GetConsoleOutputCP ();
 164       if (!cpno)
 165 #endif
 166         cpno = GetACP ();
 167       sprintf (codepage, "CP%u", cpno );
 168       /* Resolve alias.  We use a long string string and not the usual
 169          array to optimize if the code is taken to a DSO.  Taken from
 170          libiconv 1.9.2. */
 171       newset = codepage;
 172       for (aliases = ("CP936"   "\0" "GBK" "\0"
 173                       "CP1361"  "\0" "JOHAB" "\0"
 174                       "CP20127" "\0" "ASCII" "\0"
 175                       "CP20866" "\0" "KOI8-R" "\0"
 176                       "CP21866" "\0" "KOI8-RU" "\0"
 177                       "CP28591" "\0" "ISO-8859-1" "\0"
 178                       "CP28592" "\0" "ISO-8859-2" "\0"
 179                       "CP28593" "\0" "ISO-8859-3" "\0"
 180                       "CP28594" "\0" "ISO-8859-4" "\0"
 181                       "CP28595" "\0" "ISO-8859-5" "\0"
 182                       "CP28596" "\0" "ISO-8859-6" "\0"
 183                       "CP28597" "\0" "ISO-8859-7" "\0"
 184                       "CP28598" "\0" "ISO-8859-8" "\0"
 185                       "CP28599" "\0" "ISO-8859-9" "\0"
 186                       "CP28605" "\0" "ISO-8859-15" "\0"
 187                       "CP65001" "\0" "UTF-8" "\0");
 188            *aliases;
 189            aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
 190         {
 191           if (!strcmp (codepage, aliases) ||(*aliases == '*' && !aliases[1]))
 192             {
 193               newset = aliases + strlen (aliases) + 1;
 194               break;
 195             }
 196         }
 197
 198 #else /*!HAVE_W32_SYSTEM && !HAVE_ANDROID_SYSTEM*/
 199
 200 #ifdef HAVE_LANGINFO_CODESET
 201       newset = nl_langinfo (CODESET);
 202 #else /*!HAVE_LANGINFO_CODESET*/
 203       /* Try to get the used charset from environment variables.  */
 204       static char codepage[30];
 205       const char *lc, *dot, *mod;
 206
 207       strcpy (codepage, "iso-8859-1");
 208       lc = getenv ("LC_ALL");
 209       if (!lc || !*lc)
 210         {
 211           lc = getenv ("LC_CTYPE");
 212           if (!lc || !*lc)
 213             lc = getenv ("LANG");
 214         }
 215       if (lc && *lc)
 216         {
 217           dot = strchr (lc, '.');
 218           if (dot)
 219             {
 220               mod = strchr (++dot, '@');
 221               if (!mod)
 222                 mod = dot + strlen (dot);
 223               if (mod - dot < sizeof codepage && dot != mod)
 224                 {
 225                   memcpy (codepage, dot, mod - dot);
 226                   codepage [mod - dot] = 0;
 227                 }
 228             }
 229         }
 230       newset = codepage;
 231 #endif /*!HAVE_LANGINFO_CODESET*/
 232 #endif /*!HAVE_W32_SYSTEM && !HAVE_ANDROID_SYSTEM*/
 233     }
 234
 235   full_newset = newset;
 236   if (strlen (newset) > 3 && !ascii_memcasecmp (newset, "iso", 3))
 237     {
 238       newset += 3;
 239       if (*newset == '-' || *newset == '_')
 240         newset++;
 241     }
 242
 243   /* Note that we silently assume that plain ASCII is actually meant
 244      as Latin-1.  This makes sense because many Unix system don't have
 245      their locale set up properly and thus would get annoying error
 246      messages and we have to handle all the "bug" reports. Latin-1 has
 247      always been the character set used for 8 bit characters on Unix
 248      systems. */
 249   if ( !*newset
 250        || !ascii_strcasecmp (newset, "8859-1" )
 251        || !ascii_strcasecmp (newset, "646" )
 252        || !ascii_strcasecmp (newset, "ASCII" )
 253        || !ascii_strcasecmp (newset, "ANSI_X3.4-1968" )
 254        )
 255     {
 256       active_charset_name = "iso-8859-1";
 257       no_translation = 0;
 258       use_iconv = 0;
 259     }
 260   else if ( !ascii_strcasecmp (newset, "utf8" )
 261             || !ascii_strcasecmp(newset, "utf-8") )
 262     {
 263       active_charset_name = "utf-8";
 264       no_translation = 1;
 265       use_iconv = 0;
 266     }
 267   else
 268     {
 269       iconv_t cd;
 270
 271       cd = iconv_open (full_newset, "utf-8");
 272       if (cd == (iconv_t)-1)
 273         {
 274           handle_iconv_error (full_newset, "utf-8", 0);
 275           return -1;
 276         }
 277       iconv_close (cd);
 278       cd = iconv_open ("utf-8", full_newset);
 279       if (cd == (iconv_t)-1)
 280         {
 281           handle_iconv_error ("utf-8", full_newset, 0);
 282           return -1;
 283         }
 284       iconv_close (cd);
 285       active_charset_name = full_newset;
 286       no_translation = 0;
 287       use_iconv = 1;
 288     }
 289   return 0;
 290 }
 291
 292 const char *
 293 get_native_charset ()
 294 {
 295   return active_charset_name;
 296 }
 297
 298 /* Return true if the native charset is utf-8.  */
 299 int
 300 is_native_utf8 (void)
 301 {
 302   return no_translation;
 303 }
 304
 305
 306 /* Convert string, which is in native encoding to UTF8 and return a
 307    new allocated UTF-8 string.  This function terminates the process
 308    on memory shortage.  */
 309 char *
 310 native_to_utf8 (const char *orig_string)
 311 {
 312   const unsigned char *string = (const unsigned char *)orig_string;
 313   const unsigned char *s;
 314   char *buffer;
 315   unsigned char *p;
 316   size_t length = 0;
 317
 318   if (no_translation)
 319     {
 320       /* Already utf-8 encoded. */
 321       buffer = xstrdup (orig_string);
 322     }
 323   else if (!use_iconv)
 324     {
 325       /* For Latin-1 we can avoid the iconv overhead. */
 326       for (s = string; *s; s++)
 327         {
 328           length++;
 329           if (*s & 0x80)
 330             length++;
 331         }
 332       buffer = xmalloc (length + 1);
 333       for (p = (unsigned char *)buffer, s = string; *s; s++)
 334         {
 335           if ( (*s & 0x80 ))
 336             {
 337               *p++ = 0xc0 | ((*s >> 6) & 3);
 338               *p++ = 0x80 | (*s & 0x3f);
 339             }
 340           else
 341             *p++ = *s;
 342         }
 343       *p = 0;
 344     }
 345   else
 346     {
 347       /* Need to use iconv.  */
 348       iconv_t cd;
 349       const char *inptr;
 350       char *outptr;
 351       size_t inbytes, outbytes;
 352
 353       cd = iconv_open ("utf-8", active_charset_name);
 354       if (cd == (iconv_t)-1)
 355         {
 356           handle_iconv_error ("utf-8", active_charset_name, 1);
 357           return native_to_utf8 (string);
 358         }
 359
 360       for (s=string; *s; s++ )
 361         {
 362           length++;
 363           if ((*s & 0x80))
 364             length += 5; /* We may need up to 6 bytes for the utf8 output. */
 365         }
 366       buffer = xmalloc (length + 1);
 367
 368       inptr = string;
 369       inbytes = strlen (string);
 370       outptr = buffer;
 371       outbytes = length;
 372       if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
 373                   &outptr, &outbytes) == (size_t)-1)
 374         {
 375           static int shown;
 376
 377           if (!shown)
 378             log_info (_("conversion from '%s' to '%s' failed: %s\n"),
 379                       active_charset_name, "utf-8", strerror (errno));
 380           shown = 1;
 381           /* We don't do any conversion at all but use the strings as is. */
 382           strcpy (buffer, string);
 383         }
 384       else /* Success.  */
 385         {
 386           *outptr = 0;
 387           /* We could realloc the buffer now but I doubt that it makes
 388              much sense given that it will get freed anyway soon
 389              after.  */
 390         }
 391       iconv_close (cd);
 392     }
 393   return buffer;
 394 }
 395
 396
 397
 398 static char *
 399 do_utf8_to_native (const char *string, size_t length, int delim,
 400                    int with_iconv)
 401 {
 402   int nleft;
 403   int i;
 404   unsigned char encbuf[8];
 405   int encidx;
 406   const unsigned char *s;
 407   size_t n;
 408   char *buffer = NULL;
 409   char *p = NULL;
 410   unsigned long val = 0;
 411   size_t slen;
 412   int resync = 0;
 413
 414   /* First pass (p==NULL): count the extended utf-8 characters.  */
 415   /* Second pass (p!=NULL): create string.  */
 416   for (;;)
 417     {
 418       for (slen = length, nleft = encidx = 0, n = 0,
 419              s = (const unsigned char *)string;
 420            slen;
 421            s++, slen--)
 422         {
 423           if (resync)
 424             {
 425               if (!(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)))
 426                 {
 427                   /* Still invalid. */
 428                   if (p)
 429                     {
 430                       sprintf (p, "\\x%02x", *s);
 431                       p += 4;
 432                     }
 433                   n += 4;
 434                   continue;
 435                 }
 436               resync = 0;
 437             }
 438           if (!nleft)
 439             {
 440               if (!(*s & 0x80))
 441                 {
 442                   /* Plain ascii. */
 443                   if ( delim != -1
 444                        && (*s < 0x20 || *s == 0x7f || *s == delim
 445                            || (delim && *s == '\\')))
 446                     {
 447                       n++;
 448                       if (p)
 449                         *p++ = '\\';
 450                       switch (*s)
 451                         {
 452                         case '\n': n++; if ( p ) *p++ = 'n'; break;
 453                         case '\r': n++; if ( p ) *p++ = 'r'; break;
 454                         case '\f': n++; if ( p ) *p++ = 'f'; break;
 455                         case '\v': n++; if ( p ) *p++ = 'v'; break;
 456                         case '\b': n++; if ( p ) *p++ = 'b'; break;
 457                         case    0: n++; if ( p ) *p++ = '0'; break;
 458                         default:
 459                           n += 3;
 460                           if (p)
 461                             {
 462                               sprintf (p, "x%02x", *s);
 463                               p += 3;
 464                             }
 465                           break;
 466                         }
 467                     }
 468                   else
 469                     {
 470                       if (p)
 471                         *p++ = *s;
 472                       n++;
 473                     }
 474                 }
 475               else if ((*s & 0xe0) == 0xc0) /* 110x xxxx */
 476                 {
 477                   val = *s & 0x1f;
 478                   nleft = 1;
 479                   encidx = 0;
 480                   encbuf[encidx++] = *s;
 481                 }
 482               else if ((*s & 0xf0) == 0xe0) /* 1110 xxxx */
 483                 {
 484                   val = *s & 0x0f;
 485                   nleft = 2;
 486                   encidx = 0;
 487                   encbuf[encidx++] = *s;
 488                 }
 489               else if ((*s & 0xf8) == 0xf0) /* 1111 0xxx */
 490                 {
 491                   val = *s & 0x07;
 492                   nleft = 3;
 493                   encidx = 0;
 494                   encbuf[encidx++] = *s;
 495                 }
 496               else if ((*s & 0xfc) == 0xf8) /* 1111 10xx */
 497                 {
 498                   val = *s & 0x03;
 499                   nleft = 4;
 500                   encidx = 0;
 501                   encbuf[encidx++] = *s;
 502                 }
 503               else if ((*s & 0xfe) == 0xfc) /* 1111 110x */
 504                 {
 505                   val = *s & 0x01;
 506                   nleft = 5;
 507                   encidx = 0;
 508                   encbuf[encidx++] = *s;
 509                 }
 510               else /* Invalid encoding: print as \xNN. */
 511                 {
 512                   if (p)
 513                     {
 514                       sprintf (p, "\\x%02x", *s);
 515                       p += 4;
 516                     }
 517                   n += 4;
 518                   resync = 1;
 519                 }
 520             }
 521           else if (*s < 0x80 || *s >= 0xc0) /* Invalid utf-8 */
 522             {
 523               if (p)
 524                 {
 525                   for (i = 0; i < encidx; i++)
 526                     {
 527                       sprintf (p, "\\x%02x", encbuf[i]);
 528                       p += 4;
 529                     }
 530                   sprintf (p, "\\x%02x", *s);
 531                   p += 4;
 532                 }
 533               n += 4 + 4 * encidx;
 534               nleft = 0;
 535               encidx = 0;
 536               resync = 1;
 537             }
 538           else
 539             {
 540               encbuf[encidx++] = *s;
 541               val <<= 6;
 542               val |= *s & 0x3f;
 543               if (!--nleft)  /* Ready. */
 544                 {
 545                   if (no_translation)
 546                     {
 547                       if (p)
 548                         {
 549                           for (i = 0; i < encidx; i++)
 550                             *p++ = encbuf[i];
 551                         }
 552                       n += encidx;
 553                       encidx = 0;
 554                     }
 555                   else if (with_iconv)
 556                     {
 557                       /* Our strategy for using iconv is a bit strange
 558                          but it better keeps compatibility with
 559                          previous versions in regard to how invalid
 560                          encodings are displayed.  What we do is to
 561                          keep the utf-8 as is and have the real
 562                          translation step then at the end.  Yes, I
 563                          know that this is ugly.  However we are short
 564                          of the 1.4 release and for this branch we
 565                          should not mess too much around with iconv
 566                          things.  One reason for this is that we don't
 567                          know enough about non-GNU iconv
 568                          implementation and want to minimize the risk
 569                          of breaking the code on too many platforms.  */
 570                         if ( p )
 571                           {
 572                             for (i=0; i < encidx; i++ )
 573                               *p++ = encbuf[i];
 574                           }
 575                         n += encidx;
 576                         encidx = 0;
 577                     }
 578                   else  /* Latin-1 case. */
 579                     {
 580                       if (val >= 0x80 && val < 256)
 581                         {
 582                           /* We can simply print this character */
 583                           n++;
 584                           if (p)
 585                             *p++ = val;
 586                         }
 587                       else
 588                         {
 589                           /* We do not have a translation: print utf8. */
 590                           if (p)
 591                             {
 592                               for (i = 0; i < encidx; i++)
 593                                 {
 594                                   sprintf (p, "\\x%02x", encbuf[i]);
 595                                   p += 4;
 596                                 }
 597                             }
 598                           n += encidx * 4;
 599                           encidx = 0;
 600                         }
 601                     }
 602                 }
 603
 604             }
 605         }
 606       if (!buffer)
 607         {
 608           /* Allocate the buffer after the first pass. */
 609           buffer = p = xmalloc (n + 1);
 610         }
 611       else if (with_iconv)
 612         {
 613           /* Note: See above for comments.  */
 614           iconv_t cd;
 615           const char *inptr;
 616           char *outbuf, *outptr;
 617           size_t inbytes, outbytes;
 618
 619           *p = 0;  /* Terminate the buffer. */
 620
 621           cd = iconv_open (active_charset_name, "utf-8");
 622           if (cd == (iconv_t)-1)
 623             {
 624               handle_iconv_error (active_charset_name, "utf-8", 1);
 625               xfree (buffer);
 626               return utf8_to_native (string, length, delim);
 627             }
 628
 629           /* Allocate a new buffer large enough to hold all possible
 630              encodings. */
 631           n = p - buffer + 1;
 632           inbytes = n - 1;;
 633           inptr = buffer;
 634           outbytes = n * MB_LEN_MAX;
 635           if (outbytes / MB_LEN_MAX != n)
 636             BUG (); /* Actually an overflow. */
 637           outbuf = outptr = xmalloc (outbytes);
 638           if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
 639                       &outptr, &outbytes) == (size_t)-1)
 640             {
 641               static int shown;
 642
 643               if (!shown)
 644                 log_info (_("conversion from '%s' to '%s' failed: %s\n"),
 645                           "utf-8", active_charset_name, strerror (errno));
 646               shown = 1;
 647               /* Didn't worked out.  Try again but without iconv.  */
 648               xfree (buffer);
 649               buffer = NULL;
 650               xfree (outbuf);
 651               outbuf = do_utf8_to_native (string, length, delim, 0);
 652             }
 653             else /* Success.  */
 654               {
 655                 *outptr = 0; /* Make sure it is a string. */
 656                 /* We could realloc the buffer now but I doubt that it
 657                    makes much sense given that it will get freed
 658                    anyway soon after.  */
 659                 xfree (buffer);
 660               }
 661           iconv_close (cd);
 662           return outbuf;
 663         }
 664       else /* Not using iconv. */
 665         {
 666           *p = 0; /* Make sure it is a string. */
 667           return buffer;
 668         }
 669     }
 670 }
 671
 672 /* Convert string, which is in UTF-8 to native encoding.  Replace
 673    illegal encodings by some "\xnn" and quote all control
 674    characters. A character with value DELIM will always be quoted, it
 675    must be a vanilla ASCII character.  A DELIM value of -1 is special:
 676    it disables all quoting of control characters.  This function
 677    terminates the process on memory shortage.  */
 678 char *
 679 utf8_to_native (const char *string, size_t length, int delim)
 680 {
 681   return do_utf8_to_native (string, length, delim, use_iconv);
 682 }
 683
 684
 685
 686
 687 /* Wrapper function for iconv_open, required for W32 as we dlopen that
 688    library on that system.  */
 689 jnlib_iconv_t
 690 jnlib_iconv_open (const char *tocode, const char *fromcode)
 691 {
 692   return (jnlib_iconv_t)iconv_open (tocode, fromcode);
 693 }
 694
 695
 696 /* Wrapper function for iconv, required for W32 as we dlopen that
 697    library on that system.  */
 698 size_t
 699 jnlib_iconv (jnlib_iconv_t cd,
 700              const char **inbuf, size_t *inbytesleft,
 701              char **outbuf, size_t *outbytesleft)
 702 {
 703   return iconv ((iconv_t)cd, (char**)inbuf, inbytesleft, outbuf, outbytesleft);
 704 }
 705
 706 /* Wrapper function for iconv_close, required for W32 as we dlopen that
 707    library on that system.  */
 708 int
 709 jnlib_iconv_close (jnlib_iconv_t cd)
 710 {
 711   return iconv_close ((iconv_t)cd);
 712 }
 713
 714
 715 #ifdef HAVE_W32_SYSTEM
 716 /* Return a malloced string encoded in UTF-8 from the wide char input
 717    string STRING.  Caller must free this value.  Returns NULL and sets
 718    ERRNO on failure.  Calling this function with STRING set to NULL is
 719    not defined.  */
 720 char *
 721 wchar_to_utf8 (const wchar_t *string)
 722 {
 723   int n;
 724   char *result;
 725
 726   n = WideCharToMultiByte (CP_UTF8, 0, string, -1, NULL, 0, NULL, NULL);
 727   if (n < 0)
 728     {
 729       gpg_err_set_errno (EINVAL);
 730       return NULL;
 731     }
 732
 733   result = xtrymalloc (n+1);
 734   if (!result)
 735     return NULL;
 736
 737   n = WideCharToMultiByte (CP_UTF8, 0, string, -1, result, n, NULL, NULL);
 738   if (n < 0)
 739     {
 740       xfree (result);
 741       gpg_err_set_errno (EINVAL);
 742       result = NULL;
 743     }
 744   return result;
 745 }
 746
 747
 748 /* Return a malloced wide char string from an UTF-8 encoded input
 749    string STRING.  Caller must free this value.  Returns NULL and sets
 750    ERRNO on failure.  Calling this function with STRING set to NULL is
 751    not defined.  */
 752 wchar_t *
 753 utf8_to_wchar (const char *string)
 754 {
 755   int n;
 756   size_t nbytes;
 757   wchar_t *result;
 758
 759   n = MultiByteToWideChar (CP_UTF8, 0, string, -1, NULL, 0);
 760   if (n < 0)
 761     {
 762       gpg_err_set_errno (EINVAL);
 763       return NULL;
 764     }
 765
 766   nbytes = (size_t)(n+1) * sizeof(*result);
 767   if (nbytes / sizeof(*result) != (n+1))
 768     {
 769       gpg_err_set_errno (ENOMEM);
 770       return NULL;
 771     }
 772   result = xtrymalloc (nbytes);
 773   if (!result)
 774     return NULL;
 775
 776   n = MultiByteToWideChar (CP_UTF8, 0, string, -1, result, n);
 777   if (n < 0)
 778     {
 779       xfree (result);
 780       gpg_err_set_errno (EINVAL);
 781       result = NULL;
 782     }
 783   return result;
 784 }
 785 #endif /*HAVE_W32_SYSTEM*/